Пример #1
0
 def save_pipeline(job_id, role, party_id, model_id, model_version):
     schedule_logger(job_id).info(
         'job {} on {} {} start to save pipeline'.format(
             job_id, role, party_id))
     job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(
         job_id=job_id, role=role, party_id=party_id)
     job_parameters = job_runtime_conf.get('job_parameters', {})
     job_type = job_parameters.get('job_type', '')
     if job_type == 'predict':
         return
     dag = job_utils.get_job_dsl_parser(
         dsl=job_dsl,
         runtime_conf=job_runtime_conf,
         train_runtime_conf=train_runtime_conf)
     predict_dsl = dag.get_predict_dsl(role=role)
     pipeline = pipeline_pb2.Pipeline()
     pipeline.inference_dsl = json_dumps(predict_dsl, byte=True)
     pipeline.train_dsl = json_dumps(job_dsl, byte=True)
     pipeline.train_runtime_conf = json_dumps(job_runtime_conf, byte=True)
     pipeline.fate_version = RuntimeConfig.get_env("FATE")
     pipeline.model_id = model_id
     pipeline.model_version = model_version
     job_tracker = Tracking(job_id=job_id,
                            role=role,
                            party_id=party_id,
                            model_id=model_id,
                            model_version=model_version)
     job_tracker.save_pipeline(pipelined_buffer_object=pipeline)
     schedule_logger(job_id).info(
         'job {} on {} {} save pipeline successfully'.format(
             job_id, role, party_id))
Пример #2
0
def component_metric_all():
    request_data = request.json
    check_request_parameters(request_data)
    tracker = Tracking(job_id=request_data['job_id'],
                       component_name=request_data['component_name'],
                       role=request_data['role'],
                       party_id=request_data['party_id'])
    metrics = tracker.get_metric_list()
    all_metric_data = {}
    if metrics:
        for metric_namespace, metric_names in metrics.items():
            all_metric_data[metric_namespace] = all_metric_data.get(
                metric_namespace, {})
            for metric_name in metric_names:
                all_metric_data[metric_namespace][
                    metric_name] = all_metric_data[metric_namespace].get(
                        metric_name, {})
                metric_data, metric_meta = get_metric_all_data(
                    tracker=tracker,
                    metric_namespace=metric_namespace,
                    metric_name=metric_name)
                all_metric_data[metric_namespace][metric_name][
                    'data'] = metric_data
                all_metric_data[metric_namespace][metric_name][
                    'meta'] = metric_meta
        return get_json_result(retcode=0,
                               retmsg='success',
                               data=all_metric_data)
    else:
        return get_json_result(retcode=0, retmsg='no data', data={})
Пример #3
0
    def update_job_status(job_id, role, party_id, job_info, create=False):
        job_info['f_run_ip'] = RuntimeConfig.JOB_SERVER_HOST
        if create:
            dsl = json_loads(job_info['f_dsl'])
            runtime_conf = json_loads(job_info['f_runtime_conf'])
            train_runtime_conf = json_loads(job_info['f_train_runtime_conf'])
            if USE_AUTHENTICATION:
                authentication_check(src_role=job_info.get('src_role', None), src_party_id=job_info.get('src_party_id', None),
                                     dsl=dsl, runtime_conf=runtime_conf, role=role, party_id=party_id)
            save_job_conf(job_id=job_id,
                          job_dsl=dsl,
                          job_runtime_conf=runtime_conf,
                          train_runtime_conf=train_runtime_conf,
                          pipeline_dsl=None)

            job_parameters = runtime_conf['job_parameters']
            job_tracker = Tracking(job_id=job_id, role=role, party_id=party_id,
                                   model_id=job_parameters["model_id"],
                                   model_version=job_parameters["model_version"])
            if job_parameters.get("job_type", "") != "predict":
                job_tracker.init_pipelined_model()
            roles = json_loads(job_info['f_roles'])
            partner = {}
            show_role = {}
            is_initiator = job_info.get('f_is_initiator', 0)
            for _role, _role_party in roles.items():
                if is_initiator or _role == role:
                    show_role[_role] = show_role.get(_role, [])
                    for _party_id in _role_party:
                        if is_initiator or _party_id == party_id:
                            show_role[_role].append(_party_id)

                if _role != role:
                    partner[_role] = partner.get(_role, [])
                    partner[_role].extend(_role_party)
                else:
                    for _party_id in _role_party:
                        if _party_id != party_id:
                            partner[_role] = partner.get(_role, [])
                            partner[_role].append(_party_id)

            dag = get_job_dsl_parser(dsl=dsl,
                                     runtime_conf=runtime_conf,
                                     train_runtime_conf=train_runtime_conf)
            job_args = dag.get_args_input()
            dataset = {}
            for _role, _role_party_args in job_args.items():
                if is_initiator or _role == role:
                    for _party_index in range(len(_role_party_args)):
                        _party_id = roles[_role][_party_index]
                        if is_initiator or _party_id == party_id:
                            dataset[_role] = dataset.get(_role, {})
                            dataset[_role][_party_id] = dataset[_role].get(_party_id, {})
                            for _data_type, _data_location in _role_party_args[_party_index]['args']['data'].items():
                                dataset[_role][_party_id][_data_type] = '{}.{}'.format(_data_location['namespace'],
                                                                                       _data_location['name'])
            job_tracker.log_job_view({'partner': partner, 'dataset': dataset, 'roles': show_role})
        else:
            job_tracker = Tracking(job_id=job_id, role=role, party_id=party_id)
        job_tracker.save_job_info(role=role, party_id=party_id, job_info=job_info, create=create)
Пример #4
0
def component_output_model():
    request_data = request.json
    check_request_parameters(request_data)
    job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(job_id=request_data['job_id'],
                                                                                    role=request_data['role'],
                                                                                    party_id=request_data['party_id'])
    model_id = job_runtime_conf['job_parameters']['model_id']
    model_version = job_runtime_conf['job_parameters']['model_version']
    tracker = Tracking(job_id=request_data['job_id'], component_name=request_data['component_name'],
                       role=request_data['role'], party_id=request_data['party_id'], model_id=model_id,
                       model_version=model_version)
    dag = job_utils.get_job_dsl_parser(dsl=job_dsl, runtime_conf=job_runtime_conf,
                                       train_runtime_conf=train_runtime_conf)
    component = dag.get_component_info(request_data['component_name'])
    output_model_json = {}
    # There is only one model output at the current dsl version.
    output_model = tracker.get_output_model(component.get_output()['model'][0] if component.get_output().get('model') else 'default')
    for buffer_name, buffer_object in output_model.items():
        if buffer_name.endswith('Param'):
            output_model_json = json_format.MessageToDict(buffer_object, including_default_value_fields=True)
    if output_model_json:
        component_define = tracker.get_component_define()
        this_component_model_meta = {}
        for buffer_name, buffer_object in output_model.items():
            if buffer_name.endswith('Meta'):
                this_component_model_meta['meta_data'] = json_format.MessageToDict(buffer_object,
                                                                                   including_default_value_fields=True)
        this_component_model_meta.update(component_define)
        return get_json_result(retcode=0, retmsg='success', data=output_model_json, meta=this_component_model_meta)
    else:
        return get_json_result(retcode=0, retmsg='no data', data={})
Пример #5
0
def job_view():
    request_data = request.json
    check_request_parameters(request_data)
    job_tracker = Tracking(job_id=request_data['job_id'],
                           role=request_data['role'],
                           party_id=request_data['party_id'])
    job_view_data = job_tracker.get_job_view()
    if job_view_data:
        job_metric_list = job_tracker.get_metric_list(job_level=True)
        job_view_data['model_summary'] = {}
        for metric_namespace, namespace_metrics in job_metric_list.items():
            job_view_data['model_summary'][metric_namespace] = job_view_data[
                'model_summary'].get(metric_namespace, {})
            for metric_name in namespace_metrics:
                job_view_data['model_summary'][metric_namespace][
                    metric_name] = job_view_data['model_summary'][
                        metric_namespace].get(metric_name, {})
                for metric_data in job_tracker.get_job_metric_data(
                        metric_namespace=metric_namespace,
                        metric_name=metric_name):
                    job_view_data['model_summary'][metric_namespace][
                        metric_name][metric_data.key] = metric_data.value
        return get_json_result(retcode=0, retmsg='success', data=job_view_data)
    else:
        return get_json_result(retcode=101, retmsg='error')
Пример #6
0
def save_metric_meta(job_id, component_name, task_id, role, party_id):
    request_data = request.json
    tracker = Tracking(job_id=job_id, component_name=component_name, task_id=task_id, role=role, party_id=party_id)
    metric_meta = deserialize_b64(request_data['metric_meta'])
    tracker.save_metric_meta(metric_namespace=request_data['metric_namespace'], metric_name=request_data['metric_name'],
                             metric_meta=metric_meta, job_level=request_data['job_level'])
    return get_json_result()
Пример #7
0
def component_metrics():
    request_data = request.json
    check_request_parameters(request_data)
    tracker = Tracking(job_id=request_data['job_id'], component_name=request_data['component_name'],
                       role=request_data['role'], party_id=request_data['party_id'])
    metrics = tracker.get_metric_list()
    if metrics:
        return get_json_result(retcode=0, retmsg='success', data=metrics)
    else:
        return get_json_result(retcode=0, retmsg='no data', data={})
Пример #8
0
 def update_task_status(job_id, component_name, task_id, role, party_id,
                        task_info):
     tracker = Tracking(job_id=job_id,
                        role=role,
                        party_id=party_id,
                        component_name=component_name,
                        task_id=task_id)
     tracker.save_task(role=role, party_id=party_id, task_info=task_info)
     schedule_logger(job_id).info(
         'job {} component {} {} {} status {}'.format(
             job_id, component_name, role, party_id,
             task_info.get('f_status', '')))
def job_quantity_constraint(job_id, role, party_id, job_info):
    lock = Lock()
    with lock:
        time.sleep(1)
        if RuntimeConfig.WORK_MODE == WorkMode.CLUSTER:
            if role == LIMIT_ROLE:
                running_jobs = job_utils.query_job(status='running', role=role)
                ready_jobs = job_utils.query_job(tag='ready', role=role)
                if len(running_jobs)+len(ready_jobs) >= MAX_CONCURRENT_JOB_RUN_HOST:
                    return False
                else:
                    tracker = Tracking(job_id=job_id, role=role, party_id=party_id)
                    tracker.save_job_info(role=role, party_id=party_id, job_info={'f_tag': 'ready'})
        return True
Пример #10
0
def get_component_output_data_table(task_data):
    check_request_parameters(task_data)
    tracker = Tracking(job_id=task_data['job_id'], component_name=task_data['component_name'],
                       role=task_data['role'], party_id=task_data['party_id'])
    job_dsl_parser = job_utils.get_job_dsl_parser_by_job_id(job_id=task_data['job_id'])
    if not job_dsl_parser:
        raise Exception('can not get dag parser, please check if the parameters are correct')
    component = job_dsl_parser.get_component_info(task_data['component_name'])
    if not component:
        raise Exception('can not found component, please check if the parameters are correct')
    output_dsl = component.get_output()
    output_data_dsl = output_dsl.get('data', [])
    # The current version will only have one data output.
    output_data_table = tracker.get_output_data_table(output_data_dsl[0] if output_data_dsl else 'component')
    return output_data_table
Пример #11
0
    def test_upsample(self):
        sampler = RandomSampler(fraction=3, method="upsample")
        tracker = Tracking("jobid", "guest", 9999, "abc", "123")
        sampler.set_tracker(tracker)
        sample_data, sample_ids = sampler.sample(self.table)

        self.assertTrue(sample_data.count() > 250
                        and sample_data.count() < 350)

        data_dict = dict(self.data)
        new_data = list(sample_data.collect())
        for id, value in new_data:
            self.assertTrue(
                np.abs(value - data_dict[sample_ids[id]]) < consts.FLOAT_ZERO)

        trans_sampler = RandomSampler(method="upsample")
        trans_sampler.set_tracker(tracker)
        trans_sample_data = trans_sampler.sample(self.table_trans, sample_ids)
        trans_data = list(trans_sample_data.collect())
        data_to_trans_dict = dict(self.data_to_trans)

        self.assertTrue(len(trans_data) == len(sample_ids))
        for id, value in trans_data:
            self.assertTrue(
                np.abs(value -
                       data_to_trans_dict[sample_ids[id]]) < consts.FLOAT_ZERO)
Пример #12
0
    def test_downsample(self):
        sampler = RandomSampler(fraction=0.3, method="downsample")
        tracker = Tracking("jobid", "guest", 9999, "abc", "123")
        sampler.set_tracker(tracker)
        sample_data, sample_ids = sampler.sample(self.table)

        self.assertTrue(sample_data.count() > 25 and sample_data.count() < 35)
        self.assertTrue(len(set(sample_ids)) == len(sample_ids))

        new_data = list(sample_data.collect())
        data_dict = dict(self.data)
        for id, value in new_data:
            self.assertTrue(id in data_dict)
            self.assertTrue(
                np.abs(value - data_dict.get(id)) < consts.FLOAT_ZERO)

        trans_sampler = RandomSampler(method="downsample")
        trans_sampler.set_tracker(tracker)
        trans_sample_data = trans_sampler.sample(self.table_trans, sample_ids)
        trans_data = list(trans_sample_data.collect())
        trans_sample_ids = [id for (id, value) in trans_data]
        data_to_trans_dict = dict(self.data_to_trans)
        sample_id_mapping = dict(zip(sample_ids, range(len(sample_ids))))

        self.assertTrue(len(trans_data) == len(sample_ids))
        self.assertTrue(set(trans_sample_ids) == set(sample_ids))

        for id, value in trans_data:
            self.assertTrue(id in sample_id_mapping)
            self.assertTrue(
                np.abs(value - data_to_trans_dict.get(id)) < consts.FLOAT_ZERO)
Пример #13
0
    def setUp(self):
        self.data = []
        self.data_with_value = []
        for i in range(100):
            row = []
            row_with_value = []
            for j in range(100):
                if random.randint(1, 100) > 30:
                    continue
                str_r = ''.join(
                    random.sample(string.ascii_letters + string.digits, 10))
                row.append(str_r)
                row_with_value.append(str_r + ':' + str(random.random()))

            self.data.append((i, ' '.join(row)))
            self.data_with_value.append((i, ' '.join(row_with_value)))

        self.table1 = session.parallelize(self.data,
                                          include_key=True,
                                          partition=16)
        self.table2 = session.parallelize(self.data_with_value,
                                          include_key=True,
                                          partition=16)
        self.args1 = {"data": {"data_io_0": {"data": self.table1}}}
        self.args2 = {"data": {"data_io_1": {"data": self.table2}}}

        self.tracker = Tracking("jobid", "guest", 9999, "abc", "123")
Пример #14
0
    def setUp(self):
        self.data = []
        self.max_feature = -1
        for i in range(100):
            row = []
            label = i % 2
            row.append(str(label))
            dict = {}

            for j in range(20):
                x = random.randint(0, 1000)
                val = random.random()
                if x in dict:
                    continue
                self.max_feature = max(self.max_feature, x)
                dict[x] = True
                row.append(":".join(map(str, [x, val])))

            self.data.append((i, " ".join(row)))

        self.table = session.parallelize(self.data,
                                         include_key=True,
                                         partition=16)
        self.args = {"data": {"data_io_0": {"data": self.table}}}

        self.tracker = Tracking("jobid", "guest", 9999, "abc", "123")
Пример #15
0
    def test_downsample(self):
        fractions = [(0, 0.3), (1, 0.4), (2, 0.5), (3, 0.8)]
        sampler = StratifiedSampler(fractions=fractions, method="downsample")
        tracker = Tracking("jobid", "guest", 9999, "abc", "123")
        sampler.set_tracker(tracker)
        sample_data, sample_ids = sampler.sample(self.table)
        count_label = [0 for i in range(4)]
        new_data = list(sample_data.collect())
        data_dict = dict(self.data)
        self.assertTrue(
            set(sample_ids) & set(data_dict.keys()) == set(sample_ids))

        for id, inst in new_data:
            count_label[inst.label] += 1
            self.assertTrue(
                type(id).__name__ == 'int' and id >= 0 and id < 1000)
            self.assertTrue(inst.label == self.data[id][1].label
                            and inst.features == self.data[id][1].features)

        for i in range(4):
            self.assertTrue(
                np.abs(count_label[i] - 250 * fractions[i][1]) < 10)

        trans_sampler = StratifiedSampler(method="downsample")
        trans_sampler.set_tracker(tracker)
        trans_sample_data = trans_sampler.sample(self.table_trans, sample_ids)
        trans_data = list(trans_sample_data.collect())
        trans_sample_ids = [id for (id, value) in trans_data]
        data_to_trans_dict = dict(self.data_to_trans)

        self.assertTrue(set(trans_sample_ids) == set(sample_ids))
        for id, inst in trans_data:
            self.assertTrue(
                inst.features == data_to_trans_dict.get(id).features)
Пример #16
0
def component_metric_data():
    request_data = request.json
    check_request_parameters(request_data)
    tracker = Tracking(job_id=request_data['job_id'], component_name=request_data['component_name'],
                       role=request_data['role'], party_id=request_data['party_id'])
    metric_data, metric_meta = get_metric_all_data(tracker=tracker, metric_namespace=request_data['metric_namespace'],
                                                   metric_name=request_data['metric_name'])
    if metric_data or metric_meta:
        return get_json_result(retcode=0, retmsg='success', data=metric_data,
                               meta=metric_meta)
    else:
        return get_json_result(retcode=0, retmsg='no data', data=[], meta={})
Пример #17
0
 def clean_job(job_id, role, party_id, roles, party_ids):
     schedule_logger(job_id).info('job {} on {} {} start to clean'.format(job_id, role, party_id))
     tasks = job_utils.query_task(job_id=job_id, role=role, party_id=party_id)
     for task in tasks:
         try:
             Tracking(job_id=job_id, role=role, party_id=party_id, task_id=task.f_task_id).clean_task(roles, party_ids)
             schedule_logger(job_id).info(
                 'job {} component {} on {} {} clean done'.format(job_id, task.f_component_name, role, party_id))
         except Exception as e:
             schedule_logger(job_id).info(
                 'job {} component {} on {} {} clean failed'.format(job_id, task.f_component_name, role, party_id))
             schedule_logger(job_id).exception(e)
     schedule_logger(job_id).info('job {} on {} {} clean done'.format(job_id, role, party_id))
Пример #18
0
    def setUp(self):
        name1 = "dense_data_" + str(random.random())
        name2 = "dense_data_" + str(random.random())
        namespace = "data_io_dense_test"
        data1 = [("a", "1,2,-1,0,0,5"), ("b", "4,5,6,0,1,2")]
        schema = {"header": "x1,x2,x3,x4,x5,x6", "sid": "id"}
        table1 = session.parallelize(data1, include_key=True, partition=16)
        table1.save_as(name1, namespace)
        session.save_data_table_meta(schema, name1, namespace)
        self.table1 = session.table(name1, namespace)

        data2 = [("a", '-1,,na,null,null,2')]
        table2 = session.parallelize(data2, include_key=True, partition=16)
        table2.save_as(name2, namespace)
        session.save_data_table_meta(schema, name2, namespace)
        self.table2 = session.table(name2, namespace)

        self.args1 = {"data": {"data_io_0": {"data": self.table1}}}
        self.args2 = {"data": {"data_io_1": {"data": self.table2}}}
        self.tracker = Tracking("jobid", "guest", 9999, "abc", "123")
Пример #19
0
    def test_upsample(self):
        fractions = [(0, 1.3), (1, 0.5), (2, 0.8), (3, 9)]
        sampler = StratifiedSampler(fractions=fractions, method="upsample")
        tracker = Tracking("jobid", "guest", 9999, "abc", "123")
        sampler.set_tracker(tracker)
        sample_data, sample_ids = sampler.sample(self.table)
        new_data = list(sample_data.collect())
        count_label = [0 for i in range(4)]
        data_dict = dict(self.data)

        for id, inst in new_data:
            count_label[inst.label] += 1
            self.assertTrue(
                type(id).__name__ == 'int' and id >= 0
                and id < len(sample_ids))
            real_id = sample_ids[id]
            self.assertTrue(
                inst.label == self.data[real_id][1].label
                and inst.features == self.data[real_id][1].features)

        for i in range(4):
            self.assertTrue(
                np.abs(count_label[i] - 250 * fractions[i][1]) < 10)

        trans_sampler = StratifiedSampler(method="upsample")
        trans_sampler.set_tracker(tracker)
        trans_sample_data = trans_sampler.sample(self.table_trans, sample_ids)
        trans_data = (trans_sample_data.collect())
        trans_sample_ids = [id for (id, value) in trans_data]
        data_to_trans_dict = dict(self.data_to_trans)

        self.assertTrue(
            sorted(trans_sample_ids) == list(range(len(sample_ids))))
        for id, inst in trans_data:
            real_id = sample_ids[id]
            self.assertTrue(
                inst.features == data_to_trans_dict[real_id][1].features)
Пример #20
0
    def run_task():
        task = Task()
        task.f_create_time = current_timestamp()
        try:
            parser = argparse.ArgumentParser()
            parser.add_argument('-j', '--job_id', required=True, type=str, help="job id")
            parser.add_argument('-n', '--component_name', required=True, type=str,
                                help="component name")
            parser.add_argument('-t', '--task_id', required=True, type=str, help="task id")
            parser.add_argument('-r', '--role', required=True, type=str, help="role")
            parser.add_argument('-p', '--party_id', required=True, type=str, help="party id")
            parser.add_argument('-c', '--config', required=True, type=str, help="task config")
            parser.add_argument('--processors_per_node', help="processors_per_node", type=int)
            parser.add_argument('--job_server', help="job server", type=str)
            args = parser.parse_args()
            schedule_logger(args.job_id).info('enter task process')
            schedule_logger(args.job_id).info(args)
            # init function args
            if args.job_server:
                RuntimeConfig.init_config(HTTP_PORT=args.job_server.split(':')[1])
                RuntimeConfig.set_process_role(ProcessRole.EXECUTOR)
            job_id = args.job_id
            component_name = args.component_name
            task_id = args.task_id
            role = args.role
            party_id = int(args.party_id)
            executor_pid = os.getpid()
            task_config = file_utils.load_json_conf(args.config)
            job_parameters = task_config['job_parameters']
            job_initiator = task_config['job_initiator']
            job_args = task_config['job_args']
            task_input_dsl = task_config['input']
            task_output_dsl = task_config['output']
            component_parameters = TaskExecutor.get_parameters(job_id, component_name, role, party_id)
            task_parameters = task_config['task_parameters']
            module_name = task_config['module_name']
            TaskExecutor.monkey_patch()
        except Exception as e:
            traceback.print_exc()
            schedule_logger().exception(e)
            task.f_status = TaskStatus.FAILED
            return
        try:
            job_log_dir = os.path.join(job_utils.get_job_log_directory(job_id=job_id), role, str(party_id))
            task_log_dir = os.path.join(job_log_dir, component_name)
            log_utils.LoggerFactory.set_directory(directory=task_log_dir, parent_log_dir=job_log_dir,
                                                  append_to_parent_log=True, force=True)

            task.f_job_id = job_id
            task.f_component_name = component_name
            task.f_task_id = task_id
            task.f_role = role
            task.f_party_id = party_id
            task.f_operator = 'python_operator'
            tracker = Tracking(job_id=job_id, role=role, party_id=party_id, component_name=component_name,
                               task_id=task_id,
                               model_id=job_parameters['model_id'],
                               model_version=job_parameters['model_version'],
                               component_module_name=module_name)
            task.f_start_time = current_timestamp()
            task.f_run_ip = get_lan_ip()
            task.f_run_pid = executor_pid
            run_class_paths = component_parameters.get('CodePath').split('/')
            run_class_package = '.'.join(run_class_paths[:-2]) + '.' + run_class_paths[-2].replace('.py', '')
            run_class_name = run_class_paths[-1]
            task.f_status = TaskStatus.RUNNING
            TaskExecutor.sync_task_status(job_id=job_id, component_name=component_name, task_id=task_id, role=role,
                                          party_id=party_id, initiator_party_id=job_initiator.get('party_id', None),
                                          initiator_role=job_initiator.get('role', None),
                                          task_info=task.to_json())

            # init environment, process is shared globally
            RuntimeConfig.init_config(WORK_MODE=job_parameters['work_mode'],
                                      BACKEND=job_parameters.get('backend', 0))
            if args.processors_per_node and args.processors_per_node > 0 and RuntimeConfig.BACKEND == Backend.EGGROLL:
                session_options = {"eggroll.session.processors.per.node": args.processors_per_node}
            else:
                session_options = {}
            session.init(job_id=job_utils.generate_session_id(task_id, role, party_id),
                         mode=RuntimeConfig.WORK_MODE,
                         backend=RuntimeConfig.BACKEND,
                         options=session_options)
            federation.init(job_id=task_id, runtime_conf=component_parameters)

            schedule_logger().info('run {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id))
            schedule_logger().info(component_parameters)
            schedule_logger().info(task_input_dsl)
            task_run_args = TaskExecutor.get_task_run_args(job_id=job_id, role=role, party_id=party_id,
                                                           task_id=task_id,
                                                           job_args=job_args,
                                                           job_parameters=job_parameters,
                                                           task_parameters=task_parameters,
                                                           input_dsl=task_input_dsl,
                                                           if_save_as_task_input_data=job_parameters.get("save_as_task_input_data", SAVE_AS_TASK_INPUT_DATA_SWITCH)
                                                           )
            run_object = getattr(importlib.import_module(run_class_package), run_class_name)()
            run_object.set_tracker(tracker=tracker)
            run_object.set_taskid(taskid=task_id)
            run_object.run(component_parameters, task_run_args)
            output_data = run_object.save_data()
            tracker.save_output_data_table(output_data, task_output_dsl.get('data')[0] if task_output_dsl.get('data') else 'component')
            output_model = run_object.export_model()
            # There is only one model output at the current dsl version.
            tracker.save_output_model(output_model, task_output_dsl['model'][0] if task_output_dsl.get('model') else 'default')
            task.f_status = TaskStatus.COMPLETE
        except Exception as e:
            task.f_status = TaskStatus.FAILED
            schedule_logger().exception(e)
        finally:
            sync_success = False
            try:
                task.f_end_time = current_timestamp()
                task.f_elapsed = task.f_end_time - task.f_start_time
                task.f_update_time = current_timestamp()
                TaskExecutor.sync_task_status(job_id=job_id, component_name=component_name, task_id=task_id, role=role,
                                              party_id=party_id,
                                              initiator_party_id=job_initiator.get('party_id', None),
                                              initiator_role=job_initiator.get('role', None),
                                              task_info=task.to_json())
                sync_success = True
            except Exception as e:
                traceback.print_exc()
                schedule_logger().exception(e)
        schedule_logger().info('task {} {} {} start time: {}'.format(task_id, role, party_id, timestamp_to_date(task.f_start_time)))
        schedule_logger().info('task {} {} {} end time: {}'.format(task_id, role, party_id, timestamp_to_date(task.f_end_time)))
        schedule_logger().info('task {} {} {} takes {}s'.format(task_id, role, party_id, int(task.f_elapsed)/1000))
        schedule_logger().info(
            'finish {} {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id, task.f_status if sync_success else TaskStatus.FAILED))

        print('finish {} {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id, task.f_status if sync_success else TaskStatus.FAILED))
Пример #21
0
    def get_task_run_args(job_id, role, party_id, task_id, job_args, job_parameters, task_parameters, input_dsl,
                          if_save_as_task_input_data, filter_type=None, filter_attr=None):
        task_run_args = {}
        for input_type, input_detail in input_dsl.items():
            if filter_type and input_type not in filter_type:
                continue
            if input_type == 'data':
                this_type_args = task_run_args[input_type] = task_run_args.get(input_type, {})
                for data_type, data_list in input_detail.items():
                    for data_key in data_list:
                        data_key_item = data_key.split('.')
                        search_component_name, search_data_name = data_key_item[0], data_key_item[1]
                        if search_component_name == 'args':
                            if job_args.get('data', {}).get(search_data_name).get('namespace', '') and job_args.get(
                                    'data', {}).get(search_data_name).get('name', ''):

                                data_table = session.table(
                                    namespace=job_args['data'][search_data_name]['namespace'],
                                    name=job_args['data'][search_data_name]['name'])
                            else:
                                data_table = None
                        else:
                            data_table = Tracking(job_id=job_id, role=role, party_id=party_id,
                                                  component_name=search_component_name).get_output_data_table(
                                data_name=search_data_name)
                        args_from_component = this_type_args[search_component_name] = this_type_args.get(
                            search_component_name, {})
                        # todo: If the same component has more than one identical input, save as is repeated
                        if if_save_as_task_input_data:
                            if data_table:
                                schedule_logger().info("start save as task {} input data table {} {}".format(
                                    task_id,
                                    data_table.get_namespace(),
                                    data_table.get_name()))
                                origin_table_metas = data_table.get_metas()
                                origin_table_schema = data_table.schema
                                save_as_options = {"store_type": StoreTypes.ROLLPAIR_IN_MEMORY} if SAVE_AS_TASK_INPUT_DATA_IN_MEMORY else {}
                                data_table = data_table.save_as(
                                    namespace=job_utils.generate_session_id(task_id=task_id,
                                                                            role=role,
                                                                            party_id=party_id),
                                    name=data_table.get_name(),
                                    partition=task_parameters['input_data_partition'] if task_parameters.get('input_data_partition', 0) > 0 else data_table.get_partitions(),
                                    options=save_as_options)
                                data_table.save_metas(origin_table_metas)
                                data_table.schema = origin_table_schema
                                schedule_logger().info("save as task {} input data table to {} {} done".format(
                                    task_id,
                                    data_table.get_namespace(),
                                    data_table.get_name()))
                            else:
                                schedule_logger().info("pass save as task {} input data table, because the table is none".format(task_id))
                        else:
                            schedule_logger().info("pass save as task {} input data table, because the switch is off".format(task_id))
                        if not data_table or not filter_attr or not filter_attr.get("data", None):
                            args_from_component[data_type] = data_table
                        else:
                            args_from_component[data_type] = dict([(a, getattr(data_table, "get_{}".format(a))()) for a in filter_attr["data"]])
            elif input_type in ['model', 'isometric_model']:
                this_type_args = task_run_args[input_type] = task_run_args.get(input_type, {})
                for dsl_model_key in input_detail:
                    dsl_model_key_items = dsl_model_key.split('.')
                    if len(dsl_model_key_items) == 2:
                        search_component_name, search_model_alias = dsl_model_key_items[0], dsl_model_key_items[1]
                    elif len(dsl_model_key_items) == 3 and dsl_model_key_items[0] == 'pipeline':
                        search_component_name, search_model_alias = dsl_model_key_items[1], dsl_model_key_items[2]
                    else:
                        raise Exception('get input {} failed'.format(input_type))
                    models = Tracking(job_id=job_id, role=role, party_id=party_id, component_name=search_component_name,
                                      model_id=job_parameters['model_id'],
                                      model_version=job_parameters['model_version']).get_output_model(
                        model_alias=search_model_alias)
                    this_type_args[search_component_name] = models
        return task_run_args
Пример #22
0
    def submit_job(job_data, job_id=None):
        if not job_id:
            job_id = generate_job_id()
        schedule_logger(job_id).info('submit job, job_id {}, body {}'.format(job_id, job_data))
        job_dsl = job_data.get('job_dsl', {})
        job_runtime_conf = job_data.get('job_runtime_conf', {})
        job_utils.check_pipeline_job_runtime_conf(job_runtime_conf)
        job_parameters = job_runtime_conf['job_parameters']
        job_initiator = job_runtime_conf['initiator']
        job_type = job_parameters.get('job_type', '')
        if job_type != 'predict':
            # generate job model info
            job_parameters['model_id'] = '#'.join([dtable_utils.all_party_key(job_runtime_conf['role']), 'model'])
            job_parameters['model_version'] = job_id
            train_runtime_conf = {}
        else:
            detect_utils.check_config(job_parameters, ['model_id', 'model_version'])
            # get inference dsl from pipeline model as job dsl
            job_tracker = Tracking(job_id=job_id, role=job_initiator['role'], party_id=job_initiator['party_id'],
                                   model_id=job_parameters['model_id'], model_version=job_parameters['model_version'])
            pipeline_model = job_tracker.get_output_model('pipeline')
            job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl)
            train_runtime_conf = json_loads(pipeline_model['Pipeline'].train_runtime_conf)
        path_dict = save_job_conf(job_id=job_id,
                                  job_dsl=job_dsl,
                                  job_runtime_conf=job_runtime_conf,
                                  train_runtime_conf=train_runtime_conf,
                                  pipeline_dsl=None)

        job = Job()
        job.f_job_id = job_id
        job.f_roles = json_dumps(job_runtime_conf['role'])
        job.f_work_mode = job_parameters['work_mode']
        job.f_initiator_party_id = job_initiator['party_id']
        job.f_dsl = json_dumps(job_dsl)
        job.f_runtime_conf = json_dumps(job_runtime_conf)
        job.f_train_runtime_conf = json_dumps(train_runtime_conf)
        job.f_run_ip = ''
        job.f_status = JobStatus.WAITING
        job.f_progress = 0
        job.f_create_time = current_timestamp()

        initiator_role = job_initiator['role']
        initiator_party_id = job_initiator['party_id']
        if initiator_party_id not in job_runtime_conf['role'][initiator_role]:
            schedule_logger(job_id).info("initiator party id error:{}".format(initiator_party_id))
            raise Exception("initiator party id error {}".format(initiator_party_id))

        get_job_dsl_parser(dsl=job_dsl,
                           runtime_conf=job_runtime_conf,
                           train_runtime_conf=train_runtime_conf)

        TaskScheduler.distribute_job(job=job, roles=job_runtime_conf['role'], job_initiator=job_initiator)

        # push into queue
        job_event = job_utils.job_event(job_id, initiator_role,  initiator_party_id)
        try:
            RuntimeConfig.JOB_QUEUE.put_event(job_event)
        except Exception as e:
            raise Exception('push job into queue failed')

        schedule_logger(job_id).info(
            'submit job successfully, job id is {}, model id is {}'.format(job.f_job_id, job_parameters['model_id']))
        board_url = BOARD_DASHBOARD_URL.format(job_id, job_initiator['role'], job_initiator['party_id'])
        logs_directory = get_job_log_directory(job_id)
        return job_id, path_dict['job_dsl_path'], path_dict['job_runtime_conf_path'], logs_directory, \
               {'model_id': job_parameters['model_id'],'model_version': job_parameters['model_version']}, board_url