Exemplo n.º 1
0
    def adapt_engine_parameters(cls,
                                role,
                                job_parameters: RunParameters,
                                create_initiator_baseline=False):
        computing_engine_info = ResourceManager.get_engine_registration_info(
            engine_type=EngineType.COMPUTING,
            engine_name=job_parameters.computing_engine)
        if create_initiator_baseline:
            job_parameters.adaptation_parameters = {
                "task_nodes": 0,
                "task_cores_per_node": 0,
                "task_memory_per_node": 0,
            }
            task_cores = 0
        else:
            # use initiator baseline
            if role == "arbiter":
                task_cores = 1
            else:
                task_cores = job_parameters.adaptation_parameters[
                    "task_nodes"] * job_parameters.adaptation_parameters[
                        "task_cores_per_node"]

        if job_parameters.computing_engine in {
                ComputingEngine.STANDALONE, ComputingEngine.EGGROLL
        }:
            job_parameters.adaptation_parameters[
                "task_nodes"] = computing_engine_info.f_nodes
            job_parameters.adaptation_parameters["task_cores_per_node"] = int(
                job_parameters.eggroll_run.get(
                    "eggroll.session.processors.per.node",
                    cls.adapt_task_cores_per_node(
                        create_initiator_baseline, task_cores,
                        job_parameters.adaptation_parameters["task_nodes"])))
            if not create_initiator_baseline:
                job_parameters.eggroll_run[
                    "eggroll.session.processors.per.node"] = job_parameters.adaptation_parameters[
                        "task_cores_per_node"]
        elif job_parameters.computing_engine == ComputingEngine.SPARK:
            job_parameters.adaptation_parameters["task_nodes"] = int(
                job_parameters.spark_run.get("num-executors",
                                             computing_engine_info.f_nodes))
            job_parameters.adaptation_parameters["task_cores_per_node"] = int(
                job_parameters.spark_run.get(
                    "executor-cores",
                    cls.adapt_task_cores_per_node(
                        create_initiator_baseline, task_cores,
                        job_parameters.adaptation_parameters["task_nodes"])))
            if not create_initiator_baseline:
                job_parameters.spark_run[
                    "num-executors"] = job_parameters.adaptation_parameters[
                        "task_nodes"]
                job_parameters.spark_run[
                    "executor-cores"] = job_parameters.adaptation_parameters[
                        "task_cores_per_node"]
Exemplo n.º 2
0
    def adapt_engine_parameters(cls, role, job_parameters: RunParameters, create_initiator_baseline=False):
        computing_engine_info = ResourceManager.get_engine_registration_info(engine_type=EngineType.COMPUTING,
                                                                             engine_name=job_parameters.computing_engine)
        if create_initiator_baseline:
            job_parameters.adaptation_parameters = {
                "task_nodes": 0,
                "task_cores_per_node": 0,
                "task_memory_per_node": 0,
                # request_task_cores base on initiator and distribute to all parties, using job conf parameters or initiator fateflow server default settings
                "request_task_cores": int(job_parameters.task_cores) if job_parameters.task_cores else DEFAULT_TASK_CORES,
                "if_initiator_baseline": True
            }
        else:
            # use initiator baseline
            if role == "arbiter":
                job_parameters.adaptation_parameters["request_task_cores"] = 1
            elif "request_task_cores" not in job_parameters.adaptation_parameters:
                # compatibility 1.5.0
                job_parameters.adaptation_parameters["request_task_cores"] = job_parameters.adaptation_parameters["task_nodes"] * job_parameters.adaptation_parameters["task_cores_per_node"]

            job_parameters.adaptation_parameters["if_initiator_baseline"] = False
        adaptation_parameters = job_parameters.adaptation_parameters

        if job_parameters.computing_engine in {ComputingEngine.STANDALONE, ComputingEngine.EGGROLL}:
            adaptation_parameters["task_nodes"] = computing_engine_info.f_nodes
            if int(job_parameters.eggroll_run.get("eggroll.session.processors.per.node", 0)) > 0:
                adaptation_parameters["task_cores_per_node"] = int(job_parameters.eggroll_run["eggroll.session.processors.per.node"])
            else:
                adaptation_parameters["task_cores_per_node"] = max(1, int(adaptation_parameters["request_task_cores"] / adaptation_parameters["task_nodes"]))
            if not create_initiator_baseline:
                # set the adaptation parameters to the actual engine operation parameters
                job_parameters.eggroll_run["eggroll.session.processors.per.node"] = adaptation_parameters["task_cores_per_node"]
        elif job_parameters.computing_engine == ComputingEngine.SPARK or job_parameters.computing_engine == ComputingEngine.LINKIS_SPARK:
            adaptation_parameters["task_nodes"] = int(job_parameters.spark_run.get("num-executors", computing_engine_info.f_nodes))
            if int(job_parameters.spark_run.get("executor-cores", 0)) > 0:
                adaptation_parameters["task_cores_per_node"] = int(job_parameters.spark_run["executor-cores"])
            else:
                adaptation_parameters["task_cores_per_node"] = max(1, int(adaptation_parameters["request_task_cores"] / adaptation_parameters["task_nodes"]))
            if not create_initiator_baseline:
                # set the adaptation parameters to the actual engine operation parameters
                job_parameters.spark_run["num-executors"] = adaptation_parameters["task_nodes"]
                job_parameters.spark_run["executor-cores"] = adaptation_parameters["task_cores_per_node"]
Exemplo n.º 3
0
    def job_engine_support_parameters(cls, job_parameters: RunParameters):
        computing_engine_info = ResourceManager.get_engine_registration_info(engine_type=EngineType.COMPUTING,
                                                                             engine_name=job_parameters.computing_engine)
        job_parameters.adaptation_parameters = {
            "task_nodes": 0,
            "task_cores_per_node": 0,
            "task_memory_per_node": 0,
        }
        if job_parameters.computing_engine in {ComputingEngine.STANDALONE, ComputingEngine.EGGROLL}:
            job_parameters.adaptation_parameters["task_nodes"] = computing_engine_info.f_nodes
            job_parameters.adaptation_parameters["task_cores_per_node"] = int(
                job_parameters.eggroll_run.get("eggroll.session.processors.per.node", DEFAULT_TASK_CORES_PER_NODE))
            job_parameters.eggroll_run["eggroll.session.processors.per.node"] = job_parameters.adaptation_parameters[
                "task_cores_per_node"]
        elif job_parameters.computing_engine == ComputingEngine.SPARK:
            job_parameters.adaptation_parameters["task_nodes"] = int(job_parameters.spark_run.get("num-executors", computing_engine_info.f_nodes))
                                                                                       
            job_parameters.spark_run["num-executors"] = job_parameters.adaptation_parameters["task_nodes"]

            job_parameters.adaptation_parameters["task_cores_per_node"] = int(
                job_parameters.spark_run.get("executor-cores", DEFAULT_TASK_CORES_PER_NODE))            
            job_parameters.spark_run["executor-cores"] = job_parameters.adaptation_parameters["task_cores_per_node"]