示例#1
0
    def get_env(self):
        env = super().get_env()
        if self.get_executor() == "spark":
            env["EXECUTOR_CLUSTER"] = self.get_spark_paasta_cluster()
            env["EXECUTOR_POOL"] = self.get_spark_paasta_pool()
            # Run spark (and mesos framework) as root.
            env["SPARK_USER"] = "******"
            env["SPARK_OPTS"] = stringify_spark_env(
                self.get_spark_config_dict())
            env.update(get_mesos_spark_auth_env())
            env["CLUSTERMAN_RESOURCES"] = json.dumps(
                dict(
                    get_spark_resource_requirements(
                        spark_config_dict=self.get_spark_config_dict(),
                        webui_url=get_webui_url(self.spark_ui_port),
                    ).values()))
            if "AWS_ACCESS_KEY_ID" not in env or "AWS_SECRET_ACCESS_KEY" not in env:
                try:
                    access_key, secret_key = get_aws_credentials(
                        service=self.get_service(),
                        aws_credentials_yaml=self.config_dict.get(
                            "aws_credentials_yaml"),
                    )
                    env["AWS_ACCESS_KEY_ID"] = access_key
                    env["AWS_SECRET_ACCESS_KEY"] = secret_key
                except Exception:
                    log.warning(
                        f"Cannot set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment "
                        f"variables for tron action {self.get_instance()} of service "
                        f"{self.get_service()} via credentail file. Traceback:\n"
                        f"{traceback.format_exc()}")
            if "AWS_DEFAULT_REGION" not in env:
                env["AWS_DEFAULT_REGION"] = DEFAULT_AWS_REGION

        return env
示例#2
0
def test_service_provided_no_yaml(
    mock_load_aws_credentials_from_yaml, mock_os,
):
    mock_os.return_value = True
    credentials = get_aws_credentials(service="service_name")

    mock_load_aws_credentials_from_yaml.assert_called_once_with(
        "/etc/boto_cfg/service_name.yaml"
    )
    assert credentials == mock_load_aws_credentials_from_yaml.return_value
示例#3
0
def test_service_provided_fallback_to_default(mock_get_credentials, mock_os):
    args = mock.Mock(
        no_aws_credentials=False, aws_credentials_yaml=None, service="service_name"
    )
    mock_os.path.exists.return_value = False
    mock_get_credentials.return_value = mock.MagicMock(
        access_key="id", secret_key="secret"
    )
    credentials = get_aws_credentials(args)

    assert credentials == ("id", "secret")
示例#4
0
def test_use_default_creds(mock_load_aws_credentials_from_yaml, mock_get_credentials):
    args = mock.Mock(
        no_aws_credentials=False,
        aws_credentials_yaml=None,
        service=DEFAULT_SPARK_SERVICE,
    )
    mock_get_credentials.return_value = mock.MagicMock(
        access_key="id", secret_key="secret"
    )
    credentials = get_aws_credentials(args)

    assert credentials == ("id", "secret")
示例#5
0
def configure_and_run_docker_container(
    args: argparse.Namespace,
    docker_img: str,
    instance_config: InstanceConfig,
    system_paasta_config: SystemPaastaConfig,
) -> int:
    volumes = list()
    for volume in instance_config.get_volumes(system_paasta_config.get_volumes()):
        if os.path.exists(volume["hostPath"]):
            volumes.append(
                "{}:{}:{}".format(
                    volume["hostPath"], volume["containerPath"], volume["mode"].lower()
                )
            )
        else:
            print(
                PaastaColors.yellow(
                    "Warning: Path %s does not exist on this host. Skipping this binding."
                    % volume["hostPath"]
                ),
                file=sys.stderr,
            )

    original_docker_cmd = args.cmd or instance_config.get_cmd()
    spark_ui_port = pick_random_port(args.service + str(os.getpid()))
    spark_app_name = get_spark_app_name(original_docker_cmd, spark_ui_port)

    access_key, secret_key = get_aws_credentials(
        service=args.service,
        no_aws_credentials=args.no_aws_credentials,
        aws_credentials_yaml=args.aws_credentials_yaml,
        profile_name=args.aws_profile,
    )
    spark_config_dict = get_spark_config(
        args=args,
        spark_app_name=spark_app_name,
        spark_ui_port=spark_ui_port,
        docker_img=docker_img,
        system_paasta_config=system_paasta_config,
        volumes=volumes,
        access_key=access_key,
        secret_key=secret_key,
    )
    spark_conf_str = create_spark_config_str(spark_config_dict, is_mrjob=args.mrjob)

    # Spark client specific volumes
    volumes.append("%s:rw" % args.work_dir)
    volumes.append("/etc/passwd:/etc/passwd:ro")
    volumes.append("/etc/group:/etc/group:ro")
    volumes.append("/nail/home:/nail/home:rw")

    environment = instance_config.get_env_dictionary()
    environment.update(
        get_spark_env(args, spark_conf_str, spark_ui_port, access_key, secret_key)
    )

    webui_url = get_webui_url(spark_ui_port)

    docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str)
    if "history-server" in docker_cmd:
        print(f"\nSpark history server URL {webui_url}\n")
    elif any(c in docker_cmd for c in ["pyspark", "spark-shell", "spark-submit"]):
        print(f"\nSpark monitoring URL {webui_url}\n")

    if clusterman_metrics and _should_emit_resource_requirements(
        docker_cmd, args.mrjob
    ):
        try:
            emit_resource_requirements(spark_config_dict, args.cluster, webui_url)
        except Boto3Error as e:
            print(
                PaastaColors.red(
                    f"Encountered {e} while attempting to send resource requirements to Clusterman."
                )
            )
            if args.suppress_clusterman_metrics_errors:
                print(
                    "Continuing anyway since --suppress-clusterman-metrics-errors was passed"
                )
            else:
                raise

    return run_docker_container(
        container_name=spark_app_name,
        volumes=volumes,
        environment=environment,
        docker_img=docker_img,
        docker_cmd=docker_cmd,
        dry_run=args.dry_run,
        nvidia=args.nvidia,
    )
示例#6
0
def test_yaml_provided(mock_load_aws_credentials_from_yaml):
    credentials = get_aws_credentials(aws_credentials_yaml="credentials.yaml")

    mock_load_aws_credentials_from_yaml.assert_called_once_with("credentials.yaml")
    assert credentials == mock_load_aws_credentials_from_yaml.return_value
示例#7
0
def test_creds_disabled():
    credentials = get_aws_credentials(no_aws_credentials=True)
    assert credentials == (None, None)