Exemplo n.º 1
0
    def get_env(self):
        env = super().get_env()
        if self.get_executor() == "spark":
            env["EXECUTOR_CLUSTER"] = self.get_spark_paasta_cluster()
            env["EXECUTOR_POOL"] = self.get_spark_paasta_pool()
            # Run spark (and mesos framework) as root.
            env["SPARK_USER"] = "******"
            env["SPARK_OPTS"] = stringify_spark_env(
                self.get_spark_config_dict())
            env.update(get_mesos_spark_auth_env())
            env["CLUSTERMAN_RESOURCES"] = json.dumps(
                dict(
                    get_spark_resource_requirements(
                        spark_config_dict=self.get_spark_config_dict(),
                        webui_url=get_webui_url(self.spark_ui_port),
                    ).values()))
            if "AWS_ACCESS_KEY_ID" not in env or "AWS_SECRET_ACCESS_KEY" not in env:
                try:
                    access_key, secret_key = get_aws_credentials(
                        service=self.get_service(),
                        aws_credentials_yaml=self.config_dict.get(
                            "aws_credentials_yaml"),
                    )
                    env["AWS_ACCESS_KEY_ID"] = access_key
                    env["AWS_SECRET_ACCESS_KEY"] = secret_key
                except Exception:
                    log.warning(
                        f"Cannot set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment "
                        f"variables for tron action {self.get_instance()} of service "
                        f"{self.get_service()} via credentail file. Traceback:\n"
                        f"{traceback.format_exc()}")
            if "AWS_DEFAULT_REGION" not in env:
                env["AWS_DEFAULT_REGION"] = DEFAULT_AWS_REGION

        return env
Exemplo n.º 2
0
    def get_env(self):
        env = super().get_env()
        spark_env = {}
        if self.get_executor() == "spark":
            spark_env = get_mesos_spark_env(
                spark_app_name=
                "tron_spark_{self.get_service()}_{self.get_instance()}",
                spark_ui_port=pick_random_port(
                    f"{self.get_service()}{self.get_instance()}".encode()),
                mesos_leader=find_mesos_leader(self.get_cluster()),
                mesos_secret=load_mesos_secret_for_spark(),
                paasta_cluster=self.get_cluster(),
                paasta_pool=self.get_pool(),
                paasta_service=self.get_service(),
                paasta_instance=self.get_instance(),
                docker_img=self.get_docker_url(),
                volumes=format_volumes(
                    self.get_volumes(
                        load_system_paasta_config().get_volumes())),
                user_spark_opts=self.config_dict.get("spark_args"),
                event_log_dir=get_default_event_log_dir(
                    service=self.get_service(),
                    aws_credentials_yaml=self.config_dict.get(
                        "aws_credentials"),
                ),
            )
            env["SPARK_OPTS"] = stringify_spark_env(spark_env)

        return env
Exemplo n.º 3
0
 def get_cmd(self):
     command = self.config_dict.get("command")
     if self.get_executor() == "spark":
         # Spark expects to be able to write to MESOS_SANDBOX if it is set
         # but the default value (/mnt/mesos/sandbox) doesn't get mounted in
         # our Docker containers, so we unset it here.  (Un-setting is fine,
         # since Spark will just write to /tmp instead).
         command = "unset MESOS_DIRECTORY MESOS_SANDBOX; " + inject_spark_conf_str(
             command, stringify_spark_env(self.get_spark_config_dict()))
     return command
Exemplo n.º 4
0
    def get_env(self):
        env = super().get_env()
        if self.get_executor() == "spark":
            spark_config_dict = self.get_spark_config_dict()
            env["EXECUTOR_CLUSTER"] = self.get_spark_paasta_cluster()
            env["EXECUTOR_POOL"] = self.get_spark_paasta_pool()
            env["SPARK_OPTS"] = stringify_spark_env(spark_config_dict)
            # The actual mesos secret will be decrypted and injected on mesos master when assigning
            # tasks.
            env["SPARK_MESOS_SECRET"] = "SHARED_SECRET(SPARK_MESOS_SECRET)"
            if clusterman_metrics:
                env["CLUSTERMAN_RESOURCES"] = json.dumps(
                    generate_clusterman_metrics_entries(
                        clusterman_metrics,
                        get_resources_requested(spark_config_dict),
                        spark_config_dict["spark.app.name"],
                        get_webui_url(spark_config_dict["spark.ui.port"]),
                    ))
            else:
                env["CLUSTERMAN_RESOURCES"] = "{}"

            if "AWS_ACCESS_KEY_ID" not in env or "AWS_SECRET_ACCESS_KEY" not in env:
                try:
                    access_key, secret_key, session_token = get_aws_credentials(
                        service=self.get_service(),
                        aws_credentials_yaml=self.config_dict.get(
                            "aws_credentials_yaml"),
                    )
                    env["AWS_ACCESS_KEY_ID"] = access_key
                    env["AWS_SECRET_ACCESS_KEY"] = secret_key
                except Exception:
                    log.warning(
                        f"Cannot set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment "
                        f"variables for tron action {self.get_instance()} of service "
                        f"{self.get_service()} via credentail file. Traceback:\n"
                        f"{traceback.format_exc()}")
            if "AWS_DEFAULT_REGION" not in env:
                env["AWS_DEFAULT_REGION"] = DEFAULT_AWS_REGION

        return env
Exemplo n.º 5
0
def test_stringify_spark_env():
    conf = {'spark.mesos.leader': '1234', 'spark.mesos.principal': 'spark'}
    assert spark_config.stringify_spark_env(conf) == (
        '--conf spark.mesos.leader=1234 --conf spark.mesos.principal=spark')