def get_env(cls, job_id, provider_info): provider = ComponentProvider(**provider_info) env = provider.env.copy() env["PYTHONPATH"] = os.path.dirname(provider.path) if job_id: env["FATE_JOB_ID"] = job_id return env
def get_fate_flow_provider(cls): path = get_fate_flow_python_directory("fate_flow") provider = ComponentProvider( name="fate_flow", version=get_versions()["FATEFlow"], path=path, class_path=ComponentRegistry.get_default_class_path()) return provider
def run(self, task: Task, run_parameters, run_parameters_path, config_dir, log_dir, cwd_dir, **kwargs): spark_home = ServiceRegistry.FATE_ON_SPARK.get("spark", {}).get("home") if not spark_home: try: import pyspark spark_home = pyspark.__path__[0] except ImportError as e: raise RuntimeError("can not import pyspark") except Exception as e: raise RuntimeError("can not import pyspark") # else: # raise ValueError(f"spark home must be configured in conf/service_conf.yaml when run on cluster mode") # additional configs spark_submit_config = run_parameters.spark_run deploy_mode = spark_submit_config.get("deploy-mode", "client") if deploy_mode not in ["client"]: raise ValueError(f"deploy mode {deploy_mode} not supported") spark_submit_cmd = os.path.join(spark_home, "bin/spark-submit") executable = [ spark_submit_cmd, f"--name={task.f_task_id}#{task.f_role}" ] for k, v in spark_submit_config.items(): if k != "conf": executable.append(f"--{k}={v}") if "conf" in spark_submit_config: for ck, cv in spark_submit_config["conf"].items(): executable.append(f"--conf") executable.append(f"{ck}={cv}") extra_env = {} extra_env["SPARK_HOME"] = spark_home if DEPENDENT_DISTRIBUTION: dependence = Dependence() dependence.init(provider=ComponentProvider(**task.f_provider_info)) executor_env_pythonpath, executor_python_env, driver_python_env, archives = dependence.get_task_dependence_info( ) schedule_logger(task.f_job_id).info( f"executor_env_python {executor_python_env}," f"driver_env_python {driver_python_env}, archives {archives}") executable.append(f'--archives') executable.append(archives) executable.append(f'--conf') executable.append(f'spark.pyspark.python={executor_python_env}') executable.append(f'--conf') executable.append( f'spark.executorEnv.PYTHONPATH={executor_env_pythonpath}') executable.append(f'--conf') executable.append( f'spark.pyspark.driver.python={driver_python_env}') return WorkerManager.start_task_worker( worker_name=WorkerName.TASK_EXECUTOR, task=task, task_parameters=run_parameters, executable=executable, extra_env=extra_env)
def get_default_fate_provider(cls): path = JobDefaultConfig.default_component_provider_path.split("/") path = file_utils.get_fate_python_directory(*path) if not os.path.exists(path): raise Exception(f"default fate provider not exists: {path}") provider = ComponentProvider( name="fate", version=get_versions()["FATE"], path=path, class_path=ComponentRegistry.get_default_class_path()) return provider
def get_provider_object(cls, provider_info, check_registration=True): name, version = provider_info["name"], provider_info["version"] if check_registration and ComponentRegistry.get_providers().get( name, {}).get(version, None) is None: raise Exception(f"{name} {version} provider is not registered") path = ComponentRegistry.get_providers().get(name, {}).get( version, {}).get("path", []) class_path = ComponentRegistry.get_providers().get(name, {}).get( version, {}).get("class_path", None) if class_path is None: class_path = ComponentRegistry.REGISTRY["default_settings"][ "class_path"] return ComponentProvider(name=name, version=version, path=path, class_path=class_path)
def _run(self): result = {} dsl_parser = schedule_utils.get_job_dsl_parser( dsl=self.args.dsl, runtime_conf=self.args.runtime_conf, train_runtime_conf=self.args.train_runtime_conf, pipeline_dsl=self.args.pipeline_dsl) provider = ComponentProvider(**self.args.config["provider"]) common_task_info = self.args.config["common_task_info"] log_msg = f"initialize the components: {self.args.config['components']}" LOGGER.info( start_log(log_msg, role=self.args.role, party_id=self.args.party_id)) for component_name in self.args.config["components"]: result[component_name] = {} task_info = {} task_info.update(common_task_info) parameters, user_specified_parameters = ProviderManager.get_component_parameters( dsl_parser=dsl_parser, component_name=component_name, role=self.args.role, party_id=self.args.party_id, provider=provider) if parameters: task_info = {} task_info.update(common_task_info) task_info["component_name"] = component_name task_info["component_module"] = parameters["module"] task_info["provider_info"] = provider.to_dict() task_info["component_parameters"] = parameters TaskController.create_task( role=self.args.role, party_id=self.args.party_id, run_on_this_party=common_task_info["run_on_this_party"], task_info=task_info) result[component_name]["need_run"] = True else: # The party does not need to run, pass result[component_name]["need_run"] = False LOGGER.info( successful_log(log_msg, role=self.args.role, party_id=self.args.party_id)) return result
def instantiate_component_provider(provider_detail, alias=None, module=None, provider_name=None, provider_version=None, local_role=None, local_party_id=None, detect=True, provider_cache=None, job_parameters=None): if provider_name and provider_version: provider_path = provider_detail["providers"][provider_name][ provider_version]["path"] provider = provider_utils.get_provider_interface( ComponentProvider( name=provider_name, version=provider_version, path=provider_path, class_path=ComponentRegistry.get_default_class_path())) if provider_cache is not None: if provider_name not in provider_cache: provider_cache[provider_name] = {} provider_cache[provider_name][provider_version] = provider return provider provider_name, provider_version = RuntimeConfParserUtil.get_component_provider( alias=alias, module=module, provider_detail=provider_detail, local_role=local_role, local_party_id=local_party_id, job_parameters=job_parameters, provider_cache=provider_cache, detect=detect) return RuntimeConfParserUtil.instantiate_component_provider( provider_detail, provider_name=provider_name, provider_version=provider_version)
def register(): info = request.json or request.form.to_dict() if not Path(info["path"]).is_dir(): return error_response(400, "invalid path") provider = ComponentProvider( name=info["name"], version=info["version"], path=info["path"], class_path=info.get("class_path", ComponentRegistry.get_default_class_path())) code, std = WorkerManager.start_general_worker( worker_name=WorkerName.PROVIDER_REGISTRAR, provider=provider) if code == 0: ComponentRegistry.load() if ComponentRegistry.get_providers().get(provider.name, {}).get( provider.version, None) is None: return get_json_result(retcode=RetCode.OPERATING_ERROR, retmsg=f"not load into memory") else: return get_json_result() else: return get_json_result(retcode=RetCode.OPERATING_ERROR, retmsg=f"register failed:\n{std}")
def _run(self): provider = ComponentProvider(**self.args.config.get("provider")) dependence_type = self.args.dependence_type self.upload_dependencies_to_hadoop(provider=provider, dependence_type=dependence_type)
def check_upload(cls, job_id, provider_group, fate_flow_version_provider_info, storage_engine=FateDependenceStorageEngine.HDFS.value): schedule_logger(job_id).info( "start Check if need to upload dependencies") schedule_logger(job_id).info(f"{provider_group}") upload_details = {} check_tag = True upload_total = 0 for version, provider_info in provider_group.items(): upload_details[version] = {} provider = ComponentProvider(**provider_info) for dependence_type in [ FateDependenceName.Fate_Source_Code.value, FateDependenceName.Python_Env.value ]: schedule_logger(job_id).info(f"{dependence_type}") dependencies_storage_info = DependenceRegistry.get_dependencies_storage_meta( storage_engine=storage_engine, version=provider.version, type=dependence_type, get_or_one=True) need_upload = False if dependencies_storage_info: if dependencies_storage_info.f_upload_status: # version dependence uploading check_tag = False continue elif not dependencies_storage_info.f_storage_path: need_upload = True upload_total += 1 elif dependence_type == FateDependenceName.Fate_Source_Code.value: if provider.name == ComponentProviderName.FATE.value: check_fate_flow_provider_status = False if fate_flow_version_provider_info.values(): flow_provider = ComponentProvider( **list(fate_flow_version_provider_info. values())[0]) check_fate_flow_provider_status = DependenceRegistry.get_modify_time(flow_provider.path) \ != dependencies_storage_info.f_fate_flow_snapshot_time if FATE_FLOW_UPDATE_CHECK and check_fate_flow_provider_status: need_upload = True upload_total += 1 elif DependenceRegistry.get_modify_time(provider.path) != \ dependencies_storage_info.f_snapshot_time: need_upload = True upload_total += 1 elif provider.name == ComponentProviderName.FATE_FLOW.value and FATE_FLOW_UPDATE_CHECK: if DependenceRegistry.get_modify_time(provider.path) != \ dependencies_storage_info.f_fate_flow_snapshot_time: need_upload = True upload_total += 1 else: need_upload = True upload_total += 1 if need_upload: upload_details[version][dependence_type] = provider if upload_total > 0: check_tag = False schedule_logger(job_id).info( f"check dependencies result: {check_tag}, {upload_details}") return check_tag, upload_total > 0, upload_details
def _run(self): provider = ComponentProvider(**self.args.config.get("provider")) support_components = ComponentRegistry.register_provider(provider) ComponentRegistry.register_components(provider, support_components) ComponentRegistry.dump() stat_logger.info(json_dumps(ComponentRegistry.REGISTRY, indent=4))