Пример #1
0
 def run_job(self, job_id, config):
     default_runtime_dict = file_utils.load_json_conf('workflow/conf/default_runtime_conf.json')
     setting_conf = file_utils.load_json_conf('workflow/conf/setting_conf.json')
     _job_dir = get_job_directory(job_id=job_id)
     os.makedirs(_job_dir, exist_ok=True)
     ParameterOverride.override_parameter(default_runtime_dict, setting_conf, config, _job_dir)
     logger.info('job_id {} parameters overrode {}'.format(config, _job_dir))
     channel, stub = get_proxy_data_channel()
     for runtime_conf_path in glob.glob(os.path.join(_job_dir, '**', 'runtime_conf.json'), recursive=True):
         runtime_conf = file_utils.load_json_conf(os.path.abspath(runtime_conf_path))
         _role = runtime_conf['local']['role']
         _party_id = runtime_conf['local']['party_id']
         _method = 'POST'
         _module = runtime_conf['module']
         _url = '/workflow/{}/{}/{}'.format(job_id, _module, _role)
         _packet = wrap_grpc_packet(runtime_conf, _method, _url, _party_id, job_id)
         logger.info(
             'Starting workflow job_id:{} party_id:{} role:{} method:{} url:{}'.format(job_id, _party_id,
                                                                                       _role, _method,
                                                                                       _url))
         try:
             _return = stub.unaryCall(_packet)
             logger.info("Grpc unary response: {}".format(_return))
         except grpc.RpcError as e:
             msg = 'job_id:{} party_id:{} role:{} method:{} url:{} Failed to start workflow'.format(job_id,
                                                                                                    _party_id,
                                                                                                    _role, _method,
                                                                                                    _url)
             logger.exception(msg)
             return get_json_result(-101, 'UnaryCall submit to remote manager failed')
Пример #2
0
    def init():
        if EggRoll.init_flag:
            return
        config = file_utils.load_json_conf('arch/conf/mock_roll.json')
        egg_ids = config.get('eggs')

        for egg_id in egg_ids:
            target = config.get('storage').get(egg_id)
            channel = grpc.insecure_channel(
                target,
                options=[('grpc.max_send_message_length', -1),
                         ('grpc.max_receive_message_length', -1)])
            EggRoll.egg_list.append(kv_pb2_grpc.KVServiceStub(channel))
            procs = config.get('procs').get(egg_id)
            for proc in procs:
                _channel = grpc.insecure_channel(
                    proc,
                    options=[('grpc.max_send_message_length', -1),
                             ('grpc.max_receive_message_length', -1)])
                _stub = processor_pb2_grpc.ProcessServiceStub(_channel)
                proc_info = (_channel, _stub)
                i = len(EggRoll.proc_list)
                EggRoll.proc_egg_map[i] = int(egg_id) - 1
                EggRoll.proc_list.append(proc_info)
        EggRoll.init_flag = True
Пример #3
0
def init_roll_site_context(runtime_conf, session_id):
    from eggroll.roll_site.roll_site import RollSiteContext
    from eggroll.roll_pair.roll_pair import RollPairContext
    LOGGER.info("init_roll_site_context runtime_conf: {}".format(runtime_conf))
    session_instance = FateSession.get_instance()._eggroll.get_session()
    rp_context = RollPairContext(session_instance)

    role = runtime_conf.get("local").get("role")
    party_id = str(runtime_conf.get("local").get("party_id"))
    _path = file_utils.get_project_base_directory(
    ) + "/arch/conf/server_conf.json"

    server_conf = file_utils.load_json_conf(_path)
    host = server_conf.get('servers').get('proxy').get("host")
    port = server_conf.get('servers').get('proxy').get("port")

    options = {
        'self_role': role,
        'self_party_id': party_id,
        'proxy_endpoint': ErEndpoint(host, int(port))
    }

    rs_context = RollSiteContext(session_id,
                                 rp_ctx=rp_context,
                                 options=options)
    LOGGER.info("init_roll_site_context done: {}".format(rs_context.__dict__))
    return rp_context, rs_context
Пример #4
0
def call_back(status):
    global job_id
    global role
    global party_id
    global LOCAL_URL

    if job_id is None:
        parser = argparse.ArgumentParser()
        parser.add_argument('-j',
                            '--job_id',
                            type=str,
                            required=True,
                            help="Specify the jobid")
        parser.add_argument('-c',
                            '--config',
                            required=True,
                            type=str,
                            help="Specify a config json file path")

        args = parser.parse_args()
        job_id = args.job_id
        config = file_utils.load_json_conf(args.config)
        role = config.get('local', {}).get('role')
        party_id = config.get('local', {}).get('party_id')

    try:
        requests.post("/".join(
            [LOCAL_URL, str(job_id),
             str(role), str(party_id)]),
                      json={"status": status})
    except:
        LOGGER.info("fail to post status {}".format(status))
Пример #5
0
    def build_federation(self, federation_id, runtime_conf, server_conf_path):
        if self._work_mode.is_standalone():
            from arch.api.impl.based_1x.federation_standalone import FederationRuntime
            return FederationRuntime(session_id=federation_id,
                                     runtime_conf=runtime_conf)

        elif self._work_mode.is_cluster():
            from arch.api.impl.based_1x.federation_cluster import FederationRuntime
            server_conf = file_utils.load_json_conf(server_conf_path)
            if CONF_KEY_SERVER not in server_conf:
                raise EnvironmentError(
                    "server_conf should contain key {}".format(
                        CONF_KEY_SERVER))
            if CONF_KEY_FEDERATION not in server_conf.get(CONF_KEY_SERVER):
                raise EnvironmentError(
                    "The {} should be a json file containing key: {}".format(
                        server_conf_path, CONF_KEY_FEDERATION))
            host = server_conf.get(CONF_KEY_SERVER).get(
                CONF_KEY_FEDERATION).get("host")
            port = server_conf.get(CONF_KEY_SERVER).get(
                CONF_KEY_FEDERATION).get("port")
            return FederationRuntime(session_id=federation_id,
                                     runtime_conf=runtime_conf,
                                     host=host,
                                     port=port)
Пример #6
0
 def __init__(self, job_id, party_id, role, runtime_conf):
     self.trans_conf = file_utils.load_json_conf('federatedml/transfer_variable_conf/transfer_conf.json')
     self.job_id = job_id
     self.party_id = party_id
     self.role = role
     self.runtime_conf = runtime_conf
     self._loop = asyncio.get_event_loop()
     FederationRuntime.instance = self
Пример #7
0
def init(job_id=None, server_conf_path="arch/conf/server_conf.json"):
    if job_id is None:
        job_id = str(uuid.uuid1())
    global LOGGER
    LOGGER = getLogger()
    server_conf = file_utils.load_json_conf(server_conf_path)
    _roll_host = server_conf.get("servers").get("roll").get("host")
    _roll_port = server_conf.get("servers").get("roll").get("port")
    _EggRoll(job_id, _roll_host, _roll_port)
Пример #8
0
 def run_job(self, job_id, config):
     default_runtime_dict = file_utils.load_json_conf(
         'workflow/conf/default_runtime_conf.json')
     setting_conf = file_utils.load_json_conf(
         'workflow/conf/setting_conf.json')
     _job_dir = get_job_directory(job_id=job_id)
     os.makedirs(_job_dir, exist_ok=True)
     ParameterOverride.override_parameter(default_runtime_dict,
                                          setting_conf, config, _job_dir)
     logger.info('job_id {} parameters overrode {}'.format(
         config, _job_dir))
     run_job_success = True
     job_param = dict()
     job_param['job_id'] = job_id
     job_param['initiator'] = PARTY_ID
     for runtime_conf_path in glob.glob(os.path.join(
             _job_dir, '**', 'runtime_conf.json'),
                                        recursive=True):
         runtime_conf = file_utils.load_json_conf(
             os.path.abspath(runtime_conf_path))
         runtime_conf['JobParam'] = job_param
         _role = runtime_conf['local']['role']
         _party_id = runtime_conf['local']['party_id']
         _module = runtime_conf['module']
         st, msg = federated_api(job_id=job_id,
                                 method='POST',
                                 url='/workflow/{}/{}/{}'.format(
                                     job_id, _module, _role),
                                 party_id=_party_id,
                                 json_body=runtime_conf)
         if st == 0:
             save_job_info(job_id=job_id,
                           role=_role,
                           party_id=_party_id,
                           save_info={
                               "status": "ready",
                               "initiator": PARTY_ID
                           },
                           create=True)
         else:
             run_job_success = False
     logger.info("run job done")
     return run_job_success
Пример #9
0
def query_model_version_history():
    request_data = request.json
    try:
        config = file_utils.load_json_conf(request_data.get("config_path"))
        eggroll.init(mode=WORK_MODE)
        history = version_history(data_table_namespace=config.get("namespace"))
        return get_json_result(msg=json.dumps(history))
    except Exception as e:
        logger.exception(e)
        return get_json_result(status=1, msg="load model error: %s" % e)
Пример #10
0
def publish_model_online():
    request_data = request.json
    try:
        config = file_utils.load_json_conf(request_data.get("config_path"))
        if not config.get('servings'):
            # get my party all servings
            config['servings'] = SERVINGS
        publish_model.publish_online(config_data=config)
        return get_json_result()
    except Exception as e:
        logger.exception(e)
        return get_json_result(status=1, msg="publish model error: %s" % e)
Пример #11
0
 def __init__(self, job_id, party_id, role, runtime_conf, host, port):
     self.trans_conf = file_utils.load_json_conf('federatedml/transfer_variable_conf/transfer_conf.json')
     self.job_id = job_id
     self.party_id = party_id
     self.role = role
     self.runtime_conf = runtime_conf
     self.channel = grpc.insecure_channel(
         target="{}:{}".format(host, port),
         options=[('grpc.max_send_message_length', -1), ('grpc.max_receive_message_length', -1)])
     self.stub = federation_pb2_grpc.TransferSubmitServiceStub(self.channel)
     self.__pool = concurrent.futures.ThreadPoolExecutor()
     FederationRuntime.instance = self
Пример #12
0
    def __init__(self, transfer_conf_path):
        self.transfer_auth = {}
        for path, _, file_names in os.walk(
                os.path.join(file_utils.get_project_base_directory(),
                             transfer_conf_path)):
            for name in file_names:
                transfer_conf = os.path.join(path, name)
                if transfer_conf.endswith(".json"):
                    self.transfer_auth.update(
                        file_utils.load_json_conf(transfer_conf))

        # cache
        self._authorized_src = {}
        self._authorized_dst = {}
Пример #13
0
def init(job_id, runtime_conf, server_conf_path):
    global LOGGER
    LOGGER = getLogger()
    server_conf = file_utils.load_json_conf(server_conf_path)
    if CONF_KEY_SERVER not in server_conf:
        raise EnvironmentError("server_conf should contain key {}".format(CONF_KEY_SERVER))
    if CONF_KEY_FEDERATION not in server_conf.get(CONF_KEY_SERVER):
        raise EnvironmentError(
            "The {} should be a json file containing key: {}".format(server_conf_path, CONF_KEY_FEDERATION))
    _host = server_conf.get(CONF_KEY_SERVER).get(CONF_KEY_FEDERATION).get("host")
    _port = server_conf.get(CONF_KEY_SERVER).get(CONF_KEY_FEDERATION).get("port")
    if CONF_KEY_LOCAL not in runtime_conf:
        raise EnvironmentError("runtime_conf should be a dict containing key: {}".format(CONF_KEY_LOCAL))
    _party_id = runtime_conf.get(CONF_KEY_LOCAL).get("party_id")
    _role = runtime_conf.get(CONF_KEY_LOCAL).get("role")
    return FederationRuntime(job_id, _party_id, _role, runtime_conf, _host, _port)
Пример #14
0
def start_workflow(job_id, module, role):
    _config = request.json
    _job_dir = get_job_directory(job_id)
    _party_id = str(_config['local']['party_id'])
    _method = _config['WorkFlowParam']['method']
    default_runtime_dict = file_utils.load_json_conf(
        'workflow/conf/default_runtime_conf.json')
    fill_runtime_conf_table_info(runtime_conf=_config,
                                 default_runtime_conf=default_runtime_dict)
    conf_file_path = new_runtime_conf(job_dir=_job_dir,
                                      method=_method,
                                      module=module,
                                      role=role,
                                      party_id=_party_id)
    with open(conf_file_path, 'w+') as f:
        f.truncate()
        f.write(json.dumps(_config, indent=4))
        f.flush()
    progs = [
        "python3",
        os.path.join(file_utils.get_project_base_directory(),
                     _config['CodePath']), "-j", job_id, "-c",
        os.path.abspath(conf_file_path)
    ]
    p = run_subprocess(job_dir=_job_dir, job_role=role, progs=progs)
    job_status = "start"
    job_data = dict()
    job_data["begin_date"] = datetime.datetime.now()
    job_data["status"] = job_status
    job_data.update(_config)
    job_data["pid"] = p.pid
    job_data["all_party"] = json.dumps(_config.get("role", {}))
    job_data["initiator"] = _config.get("JobParam", {}).get("initiator")
    save_job_info(job_id=job_id,
                  role=_config.get("local", {}).get("role"),
                  party_id=_config.get("local", {}).get("party_id"),
                  save_info=job_data,
                  create=True)
    update_job_queue(job_id=job_id,
                     role=role,
                     party_id=_party_id,
                     save_data={
                         "status": job_status,
                         "pid": p.pid
                     })
    return get_json_result(data={'pid': p.pid}, job_id=job_id)
Пример #15
0
def init(job_id, runtime_conf, server_conf_path):
    server_conf = file_utils.load_json_conf(server_conf_path)
    if CONF_KEY_SERVER not in server_conf:
        raise EnvironmentError(
            "server_conf should contain key {}".format(CONF_KEY_SERVER))
    if CONF_KEY_FEDERATION not in server_conf.get(CONF_KEY_SERVER):
        raise EnvironmentError(
            "The {} should be a json file containing key: {}".format(
                server_conf_path, CONF_KEY_FEDERATION))
    _host = server_conf.get(CONF_KEY_SERVER).get(CONF_KEY_FEDERATION).get(
        "host")
    _port = server_conf.get(CONF_KEY_SERVER).get(CONF_KEY_FEDERATION).get(
        "port")

    federation_runtime = federation.init(job_id, runtime_conf,
                                         server_conf_path)
    return FateScript(federation_runtime, _host, _port)
Пример #16
0
def stop_job(job_id):
    _job_dir = get_job_directory(job_id)
    for runtime_conf_path in glob.glob(os.path.join(_job_dir, '**', 'runtime_conf.json'), recursive=True):
        runtime_conf = file_utils.load_json_conf(os.path.abspath(runtime_conf_path))
        _role = runtime_conf['local']['role']
        _party_id = runtime_conf['local']['party_id']
        _url = '/workflow/{}'.format(job_id)
        _method = 'DELETE'
        _packet = wrap_grpc_packet({}, _method, _url, _party_id, job_id)
        channel, stub = get_proxy_data_channel()
        try:
            _return = stub.unaryCall(_packet)
            logger.info("Grpc unary response: {}".format(_return))
        except grpc.RpcError as e:
            msg = 'job_id:{} party_id:{} role:{} method:{} url:{} Failed to start workflow'.format(job_id,
                                                                                                   _party_id,
                                                                                                   _role, _method,
                                                                                                   _url)
            logger.exception(msg)
            return get_json_result(-101, 'UnaryCall stop to remote manager failed')
    return get_json_result()
Пример #17
0
def test_component(self, fun):
    job_id = os.listdir(os.path.abspath(os.path.join(
        self.success_job_dir)))[-1]
    job_info = file_utils.load_json_conf(
        os.path.abspath(os.path.join(self.success_job_dir, job_id)))
    data = {
        'job_id': job_id,
        'role': job_info['f_role'],
        'party_id': job_info['f_party_id'],
        'component_name': self.test_component_name
    }
    if 'download' in fun:
        response = requests.get("/".join([self.server_url, "tracking", fun]),
                                json=data,
                                stream=True)
        self.assertTrue(response.status_code in [200, 201])
    else:
        response = requests.post("/".join([self.server_url, 'tracking', fun]),
                                 json=data)
        self.assertTrue(response.status_code in [200, 201])
        self.assertTrue(int(response.json()['retcode']) == 0)
Пример #18
0
def stop_job(job_id):
    _job_dir = get_job_directory(job_id)
    all_party = []
    for runtime_conf_path in glob.glob(os.path.join(_job_dir, '**',
                                                    'runtime_conf.json'),
                                       recursive=True):
        runtime_conf = file_utils.load_json_conf(
            os.path.abspath(runtime_conf_path))
        for _role, _party_ids in runtime_conf['role'].items():
            all_party.extend([(_role, _party_id) for _party_id in _party_ids])
    all_party = set(all_party)
    logger.info('start send stop job to {}'.format(','.join(
        [i[0] for i in all_party])))
    _method = 'DELETE'
    for _role, _party_id in all_party:
        federated_api(job_id=job_id,
                      method=_method,
                      url='/workflow/{}/{}/{}'.format(job_id, _role,
                                                      _party_id),
                      party_id=_party_id)
    return get_json_result(job_id=job_id)
Пример #19
0
def load_model():
    config = file_utils.load_json_conf(request.json.get("config_path"))
    _job_id = generate_job_id()
    channel, stub = get_proxy_data_channel()
    for _party_id in config.get("party_ids"):
        config['my_party_id'] = _party_id
        _method = 'POST'
        _url = '/model/load/do'
        _packet = wrap_grpc_packet(config, _method, _url, _party_id, _job_id)
        logger.info(
            'Starting load model job_id:{} party_id:{} method:{} url:{}'.format(_job_id, _party_id,_method, _url))
        try:
            _return = stub.unaryCall(_packet)
            logger.info("Grpc unary response: {}".format(_return))
        except grpc.RpcError as e:
            msg = 'job_id:{} party_id:{} method:{} url:{} Failed to start load model'.format(_job_id,
                                                                                             _party_id,
                                                                                             _method,
                                                                                             _url)
            logger.exception(msg)
            return get_json_result(-101, 'UnaryCall submit to remote manager failed')
    return get_json_result()
Пример #20
0
def init_federation(session_id, work_mode, runtime_conf,
                    server_conf_path) -> Federation:
    if work_mode.is_standalone():
        from .standalone import FederationRuntime
        return FederationRuntime(session_id, runtime_conf)
    elif work_mode.is_cluster():
        from .cluster import FederationRuntime
        server_conf = file_utils.load_json_conf(server_conf_path)
        if CONF_KEY_SERVER not in server_conf:
            raise EnvironmentError(
                "server_conf should contain key {}".format(CONF_KEY_SERVER))
        if CONF_KEY_FEDERATION not in server_conf.get(CONF_KEY_SERVER):
            raise EnvironmentError(
                "The {} should be a json file containing key: {}".format(
                    server_conf_path, CONF_KEY_FEDERATION))
        host = server_conf.get(CONF_KEY_SERVER).get(CONF_KEY_FEDERATION).get(
            "host")
        port = server_conf.get(CONF_KEY_SERVER).get(CONF_KEY_FEDERATION).get(
            "port")
        return FederationRuntime(session_id, runtime_conf, host, port)
    else:
        raise EnvironmentError(f"{work_mode} unknown")
Пример #21
0
    'passwd': 'fate_dev',
    'host': 'mysql',
    'port': 3306,
    'max_connections': 100,
    'stale_timeout': 30,
}

REDIS = {
    'host': 'redis',
    'port': 6379,
    'password': '******',
    'max_connections': 500
}

REDIS_QUEUE_DB_INDEX = 0
JOB_MODULE_CONF = file_utils.load_json_conf("fate_flow/job_module_conf.json")

"""
Services
"""
server_conf = file_utils.load_json_conf("arch/conf/server_conf.json")
PROXY_HOST = server_conf.get(SERVERS).get('proxy').get('host')
PROXY_PORT = server_conf.get(SERVERS).get('proxy').get('port')
BOARD_HOST = server_conf.get(SERVERS).get('fateboard').get('host')
if BOARD_HOST == 'localhost':
    BOARD_HOST = get_lan_ip()
BOARD_PORT = server_conf.get(SERVERS).get('fateboard').get('port')
SERVINGS = server_conf.get(SERVERS).get('servings')
BOARD_DASHBOARD_URL = 'http://%s:%d/index.html#/dashboard?job_id={}&role={}&party_id={}' % (BOARD_HOST, BOARD_PORT)
RuntimeConfig.init_config(WORK_MODE=WORK_MODE)
RuntimeConfig.init_config(HTTP_PORT=HTTP_PORT)
Пример #22
0
    def run_task():
        task = Task()
        task.f_create_time = current_timestamp()
        try:
            parser = argparse.ArgumentParser()
            parser.add_argument('-j', '--job_id', required=True, type=str, help="job id")
            parser.add_argument('-n', '--component_name', required=True, type=str,
                                help="component name")
            parser.add_argument('-t', '--task_id', required=True, type=str, help="task id")
            parser.add_argument('-r', '--role', required=True, type=str, help="role")
            parser.add_argument('-p', '--party_id', required=True, type=str, help="party id")
            parser.add_argument('-c', '--config', required=True, type=str, help="task config")
            parser.add_argument('--processors_per_node', help="processors_per_node", type=int)
            parser.add_argument('--job_server', help="job server", type=str)
            args = parser.parse_args()
            schedule_logger(args.job_id).info('enter task process')
            schedule_logger(args.job_id).info(args)
            # init function args
            if args.job_server:
                RuntimeConfig.init_config(HTTP_PORT=args.job_server.split(':')[1])
                RuntimeConfig.set_process_role(ProcessRole.EXECUTOR)
            job_id = args.job_id
            component_name = args.component_name
            task_id = args.task_id
            role = args.role
            party_id = int(args.party_id)
            executor_pid = os.getpid()
            task_config = file_utils.load_json_conf(args.config)
            job_parameters = task_config['job_parameters']
            job_initiator = task_config['job_initiator']
            job_args = task_config['job_args']
            task_input_dsl = task_config['input']
            task_output_dsl = task_config['output']
            component_parameters = TaskExecutor.get_parameters(job_id, component_name, role, party_id)
            task_parameters = task_config['task_parameters']
            module_name = task_config['module_name']
            TaskExecutor.monkey_patch()
        except Exception as e:
            traceback.print_exc()
            schedule_logger().exception(e)
            task.f_status = TaskStatus.FAILED
            return
        try:
            job_log_dir = os.path.join(job_utils.get_job_log_directory(job_id=job_id), role, str(party_id))
            task_log_dir = os.path.join(job_log_dir, component_name)
            log_utils.LoggerFactory.set_directory(directory=task_log_dir, parent_log_dir=job_log_dir,
                                                  append_to_parent_log=True, force=True)

            task.f_job_id = job_id
            task.f_component_name = component_name
            task.f_task_id = task_id
            task.f_role = role
            task.f_party_id = party_id
            task.f_operator = 'python_operator'
            tracker = Tracking(job_id=job_id, role=role, party_id=party_id, component_name=component_name,
                               task_id=task_id,
                               model_id=job_parameters['model_id'],
                               model_version=job_parameters['model_version'],
                               component_module_name=module_name)
            task.f_start_time = current_timestamp()
            task.f_run_ip = get_lan_ip()
            task.f_run_pid = executor_pid
            run_class_paths = component_parameters.get('CodePath').split('/')
            run_class_package = '.'.join(run_class_paths[:-2]) + '.' + run_class_paths[-2].replace('.py', '')
            run_class_name = run_class_paths[-1]
            task.f_status = TaskStatus.RUNNING
            TaskExecutor.sync_task_status(job_id=job_id, component_name=component_name, task_id=task_id, role=role,
                                          party_id=party_id, initiator_party_id=job_initiator.get('party_id', None),
                                          initiator_role=job_initiator.get('role', None),
                                          task_info=task.to_json())

            # init environment, process is shared globally
            RuntimeConfig.init_config(WORK_MODE=job_parameters['work_mode'],
                                      BACKEND=job_parameters.get('backend', 0))
            if args.processors_per_node and args.processors_per_node > 0 and RuntimeConfig.BACKEND == Backend.EGGROLL:
                session_options = {"eggroll.session.processors.per.node": args.processors_per_node}
            else:
                session_options = {}
            session.init(job_id=job_utils.generate_session_id(task_id, role, party_id),
                         mode=RuntimeConfig.WORK_MODE,
                         backend=RuntimeConfig.BACKEND,
                         options=session_options)
            federation.init(job_id=task_id, runtime_conf=component_parameters)

            schedule_logger().info('run {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id))
            schedule_logger().info(component_parameters)
            schedule_logger().info(task_input_dsl)
            task_run_args = TaskExecutor.get_task_run_args(job_id=job_id, role=role, party_id=party_id,
                                                           task_id=task_id,
                                                           job_args=job_args,
                                                           job_parameters=job_parameters,
                                                           task_parameters=task_parameters,
                                                           input_dsl=task_input_dsl,
                                                           if_save_as_task_input_data=job_parameters.get("save_as_task_input_data", SAVE_AS_TASK_INPUT_DATA_SWITCH)
                                                           )
            run_object = getattr(importlib.import_module(run_class_package), run_class_name)()
            run_object.set_tracker(tracker=tracker)
            run_object.set_taskid(taskid=task_id)
            run_object.run(component_parameters, task_run_args)
            output_data = run_object.save_data()
            tracker.save_output_data_table(output_data, task_output_dsl.get('data')[0] if task_output_dsl.get('data') else 'component')
            output_model = run_object.export_model()
            # There is only one model output at the current dsl version.
            tracker.save_output_model(output_model, task_output_dsl['model'][0] if task_output_dsl.get('model') else 'default')
            task.f_status = TaskStatus.COMPLETE
        except Exception as e:
            task.f_status = TaskStatus.FAILED
            schedule_logger().exception(e)
        finally:
            sync_success = False
            try:
                task.f_end_time = current_timestamp()
                task.f_elapsed = task.f_end_time - task.f_start_time
                task.f_update_time = current_timestamp()
                TaskExecutor.sync_task_status(job_id=job_id, component_name=component_name, task_id=task_id, role=role,
                                              party_id=party_id,
                                              initiator_party_id=job_initiator.get('party_id', None),
                                              initiator_role=job_initiator.get('role', None),
                                              task_info=task.to_json())
                sync_success = True
            except Exception as e:
                traceback.print_exc()
                schedule_logger().exception(e)
        schedule_logger().info('task {} {} {} start time: {}'.format(task_id, role, party_id, timestamp_to_date(task.f_start_time)))
        schedule_logger().info('task {} {} {} end time: {}'.format(task_id, role, party_id, timestamp_to_date(task.f_end_time)))
        schedule_logger().info('task {} {} {} takes {}s'.format(task_id, role, party_id, int(task.f_elapsed)/1000))
        schedule_logger().info(
            'finish {} {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id, task.f_status if sync_success else TaskStatus.FAILED))

        print('finish {} {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id, task.f_status if sync_success else TaskStatus.FAILED))
Пример #23
0
audit_logger = log_utils.audit_logger()
"""
Services 
"""
IP = get_base_config("fate_flow", {}).get("host", "0.0.0.0")
HTTP_PORT = get_base_config("fate_flow", {}).get("http_port")
GRPC_PORT = get_base_config("fate_flow", {}).get("grpc_port")

# standalone job will be send to the standalone job server when FATE-Flow work on cluster deploy mode,
# but not the port for FATE-Flow on standalone deploy mode.
CLUSTER_STANDALONE_JOB_SERVER_PORT = 9381

# services ip and port
SERVER_CONF_PATH = 'arch/conf/server_conf.json'
SERVING_PATH = '/servers/servings'
server_conf = file_utils.load_json_conf(SERVER_CONF_PATH)
PROXY_HOST = server_conf.get(SERVERS).get('proxy').get('host')
PROXY_PORT = server_conf.get(SERVERS).get('proxy').get('port')
BOARD_HOST = server_conf.get(SERVERS).get('fateboard').get('host')
if BOARD_HOST == 'localhost':
    BOARD_HOST = get_lan_ip()
BOARD_PORT = server_conf.get(SERVERS).get('fateboard').get('port')
MANAGER_HOST = server_conf.get(SERVERS).get('fatemanager', {}).get('host')
MANAGER_PORT = server_conf.get(SERVERS).get('fatemanager', {}).get('port')
SERVINGS = CenterConfig.get_settings(path=SERVING_PATH,
                                     servings_zk_path=SERVINGS_ZK_PATH,
                                     use_zk=USE_CONFIGURATION_CENTER,
                                     hosts=ZOOKEEPER_HOSTS,
                                     server_conf_path=SERVER_CONF_PATH)
BOARD_DASHBOARD_URL = 'http://%s:%d/index.html#/dashboard?job_id={}&role={}&party_id={}' % (
    BOARD_HOST, BOARD_PORT)
Пример #24
0
 def __init__(self, fed_obj):
     super().__init__(fed_obj.job_id, fed_obj.party_id, fed_obj.role, fed_obj.runtime_conf)
     self.trans_conf = file_utils.load_json_conf('contrib/fate_script/conf/FateScriptTransferVar.json')
     self.encrypt_operator = None
Пример #25
0
 def init_conf(self, role):
     conf_path = file_utils.load_json_conf('contrib/fate_script/conf/' + str(role) + '_runtime_conf.json')
     self.iter_num = conf_path.get("FATEScriptLRParam").get("iter_num")
     self.batch_num = conf_path.get("FATEScriptLRParam").get("batch_num")
     self.learning_rate = conf_path.get("FATEScriptLRParam").get("learning_rate")
     self.eps = conf_path.get("FATEScriptLRParam").get("eps")
Пример #26
0
 def get_settings_from_file(path, server_conf_path):
     server_conf = file_utils.load_json_conf(server_conf_path)
     data = server_conf
     for k in path.split('/')[1:]:
         data = data.get(k, None)
     return data
Пример #27
0
def get_job_conf(job_id):
    conf_dict = {}
    for key, path in get_job_conf_path(job_id).items():
        config = file_utils.load_json_conf(path)
        conf_dict[key] = config
    return conf_dict
Пример #28
0
    def run_task():
        task = Task()
        task.f_create_time = current_timestamp()
        try:
            parser = argparse.ArgumentParser()
            parser.add_argument('-j',
                                '--job_id',
                                required=True,
                                type=str,
                                help="job id")
            parser.add_argument('-n',
                                '--component_name',
                                required=True,
                                type=str,
                                help="component name")
            parser.add_argument('-t',
                                '--task_id',
                                required=True,
                                type=str,
                                help="task id")
            parser.add_argument('-r',
                                '--role',
                                required=True,
                                type=str,
                                help="role")
            parser.add_argument('-p',
                                '--party_id',
                                required=True,
                                type=str,
                                help="party id")
            parser.add_argument('-c',
                                '--config',
                                required=True,
                                type=str,
                                help="task config")
            parser.add_argument('--job_server', help="job server", type=str)
            args = parser.parse_args()
            schedule_logger.info('enter task process')
            schedule_logger.info(args)
            # init function args
            if args.job_server:
                RuntimeConfig.init_config(
                    HTTP_PORT=args.job_server.split(':')[1])
            job_id = args.job_id
            component_name = args.component_name
            task_id = args.task_id
            role = args.role
            party_id = int(args.party_id)
            task_config = file_utils.load_json_conf(args.config)
            job_parameters = task_config['job_parameters']
            job_initiator = task_config['job_initiator']
            job_args = task_config['job_args']
            task_input_dsl = task_config['input']
            task_output_dsl = task_config['output']
            parameters = task_config['parameters']
            module_name = task_config['module_name']
        except Exception as e:
            schedule_logger.exception(e)
            task.f_status = TaskStatus.FAILED
            return
        try:
            # init environment, process is shared globally
            RuntimeConfig.init_config(WORK_MODE=job_parameters['work_mode'])
            storage.init_storage(job_id=task_id,
                                 work_mode=RuntimeConfig.WORK_MODE)
            federation.init(job_id=task_id, runtime_conf=parameters)
            job_log_dir = os.path.join(
                job_utils.get_job_log_directory(job_id=job_id), role,
                str(party_id))
            task_log_dir = os.path.join(job_log_dir, component_name)
            log_utils.LoggerFactory.set_directory(directory=task_log_dir,
                                                  parent_log_dir=job_log_dir,
                                                  append_to_parent_log=True,
                                                  force=True)

            task.f_job_id = job_id
            task.f_component_name = component_name
            task.f_task_id = task_id
            task.f_role = role
            task.f_party_id = party_id
            task.f_operator = 'python_operator'
            tracker = Tracking(job_id=job_id,
                               role=role,
                               party_id=party_id,
                               component_name=component_name,
                               task_id=task_id,
                               model_id=job_parameters['model_id'],
                               model_version=job_parameters['model_version'],
                               module_name=module_name)
            task.f_start_time = current_timestamp()
            task.f_run_ip = get_lan_ip()
            task.f_run_pid = os.getpid()
            run_class_paths = parameters.get('CodePath').split('/')
            run_class_package = '.'.join(
                run_class_paths[:-2]) + '.' + run_class_paths[-2].replace(
                    '.py', '')
            run_class_name = run_class_paths[-1]
            task_run_args = TaskExecutor.get_task_run_args(
                job_id=job_id,
                role=role,
                party_id=party_id,
                job_parameters=job_parameters,
                job_args=job_args,
                input_dsl=task_input_dsl)
            run_object = getattr(importlib.import_module(run_class_package),
                                 run_class_name)()
            run_object.set_tracker(tracker=tracker)
            run_object.set_taskid(taskid=task_id)
            task.f_status = TaskStatus.RUNNING
            TaskExecutor.sync_task_status(job_id=job_id,
                                          component_name=component_name,
                                          task_id=task_id,
                                          role=role,
                                          party_id=party_id,
                                          initiator_party_id=job_initiator.get(
                                              'party_id', None),
                                          task_info=task.to_json())

            schedule_logger.info('run {} {} {} {} {} task'.format(
                job_id, component_name, task_id, role, party_id))
            schedule_logger.info(parameters)
            schedule_logger.info(task_input_dsl)
            run_object.run(parameters, task_run_args)
            if task_output_dsl:
                if task_output_dsl.get('data', []):
                    output_data = run_object.save_data()
                    tracker.save_output_data_table(
                        output_data,
                        task_output_dsl.get('data')[0])
                if task_output_dsl.get('model', []):
                    output_model = run_object.export_model()
                    # There is only one model output at the current dsl version.
                    tracker.save_output_model(output_model,
                                              task_output_dsl['model'][0])
            task.f_status = TaskStatus.SUCCESS
        except Exception as e:
            schedule_logger.exception(e)
            task.f_status = TaskStatus.FAILED
        finally:
            try:
                task.f_end_time = current_timestamp()
                task.f_elapsed = task.f_end_time - task.f_start_time
                task.f_update_time = current_timestamp()
                TaskExecutor.sync_task_status(
                    job_id=job_id,
                    component_name=component_name,
                    task_id=task_id,
                    role=role,
                    party_id=party_id,
                    initiator_party_id=job_initiator.get('party_id', None),
                    task_info=task.to_json())
            except Exception as e:
                schedule_logger.exception(e)
        schedule_logger.info('finish {} {} {} {} {} {} task'.format(
            job_id, component_name, task_id, role, party_id, task.f_status))
        print('finish {} {} {} {} {} {} task'.format(job_id, component_name,
                                                     task_id, role, party_id,
                                                     task.f_status))
Пример #29
0
import os
import tarfile
import traceback
from contextlib import closing
import time
import re

import requests
from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor

from arch.api.utils import file_utils
from arch.api.utils.core_utils import get_lan_ip
from fate_flow.settings import SERVERS, ROLE, API_VERSION
from fate_flow.utils import detect_utils

server_conf = file_utils.load_json_conf("arch/conf/server_conf.json")
JOB_OPERATE_FUNC = ["submit_job", "stop_job", "query_job", "data_view_query", "clean_job", "clean_queue"]
JOB_FUNC = ["job_config", "job_log"]
TASK_OPERATE_FUNC = ["query_task"]
TRACKING_FUNC = ["component_parameters", "component_metric_all", "component_metric_delete", "component_metrics",
                 "component_output_model", "component_output_data", "component_output_data_table"]
DATA_FUNC = ["download", "upload", "upload_history"]
TABLE_FUNC = ["table_info", "table_delete"]
MODEL_FUNC = ["load", "bind", "store", "restore", "export", "import"]
PERMISSION_FUNC = ["grant_privilege", "delete_privilege", "query_privilege"]


def prettify(response, verbose=True):
    if verbose:
        print(json.dumps(response, indent=4, ensure_ascii=False))
        print()
Пример #30
0
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
DEFAULT_GRPC_OVERALL_TIMEOUT = 60 * 1000  # ms
HEADERS = {
    'Content-Type': 'application/json',
}


IP = '0.0.0.0'
GRPC_PORT = 9360
HTTP_PORT = 9380
PARTY_ID = 9999
WORK_MODE = 0
LOCAL_URL = "http://localhost:{}".format(HTTP_PORT)

DATABASE = {
    'engine': 'mysql',
    'name': 'task_manager',
    'user': '******',
    'passwd': 'root1234',
    'host': '127.0.0.1',
    'port': 3306,
    'max_connections': 500,
    'stale_timeout': 30,
}

server_conf = file_utils.load_json_conf("arch/conf/server_conf.json")
PROXY_HOST = server_conf.get(SERVERS).get('proxy').get('host')
PROXY_PORT = server_conf.get(SERVERS).get('proxy').get('port')
SERVINGS = server_conf.get(SERVERS).get('servings')
JOB_MODULE_CONF = file_utils.load_json_conf("arch/task_manager/job_module_conf.json")