Exemplo n.º 1
0
    def test_dont_emit_metrics_for_inappropriate_commands(
        self,
        mock_time,
        mock_run_docker_container,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
        mock_create_spark_config_str,
    ):
        mock_get_aws_credentials.return_value = ("id", "secret")
        with mock.patch(
                "paasta_tools.cli.cmds.spark_run.emit_resource_requirements",
                autospec=True) as mock_emit_resource_requirements, mock.patch(
                    "paasta_tools.cli.cmds.spark_run.clusterman_metrics",
                    autospec=True):
            mock_create_spark_config_str.return_value = "--conf spark.cores.max=5"
            args = mock.MagicMock(cmd="bash", mrjob=False)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
            )
            assert not mock_emit_resource_requirements.called
Exemplo n.º 2
0
    def test_configure_and_run_docker_container_mrjob(
        self,
        mock_time,
        mock_run_docker_container,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
    ):
        mock_get_aws_credentials.return_value = ('id', 'secret')
        with mock.patch(
            'paasta_tools.cli.cmds.spark_run.emit_resource_requirements', autospec=True,
        ) as mock_emit_resource_requirements, mock.patch(
            'paasta_tools.cli.cmds.spark_run.clusterman_metrics', autospec=True,
        ):
            mock_get_spark_config.return_value = {'spark.cores.max': 5, 'spark.master': 'mesos://spark.master'}
            args = mock.MagicMock(cmd='python mrjob_wrapper.py', mrjob=True)

            configure_and_run_docker_container(
                args=args,
                docker_img='fake-registry/fake-service',
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
            )

            args, kwargs = mock_run_docker_container.call_args
            assert kwargs['docker_cmd'] == (
                'python mrjob_wrapper.py --spark-master=mesos://spark.master --jobconf spark.cores.max=5'
            )

            assert mock_emit_resource_requirements.called
Exemplo n.º 3
0
    def test_configure_and_run_docker_container_mrjob(
        self,
        mock_time,
        mock_run_docker_container,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
    ):
        mock_get_aws_credentials.return_value = ("id", "secret")
        with mock.patch(
                "paasta_tools.cli.cmds.spark_run.emit_resource_requirements",
                autospec=True) as mock_emit_resource_requirements, mock.patch(
                    "paasta_tools.cli.cmds.spark_run.clusterman_metrics",
                    autospec=True):
            mock_get_spark_config.return_value = {
                "spark.cores.max": 5,
                "spark.master": "mesos://spark.master",
            }
            args = mock.MagicMock(cmd="python mrjob_wrapper.py", mrjob=True)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
            )

            args, kwargs = mock_run_docker_container.call_args
            assert kwargs["docker_cmd"] == (
                "python mrjob_wrapper.py --spark-master=mesos://spark.master --jobconf spark.cores.max=5"
            )

            assert mock_emit_resource_requirements.called
Exemplo n.º 4
0
    def test_dont_emit_metrics_for_inappropriate_commands(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        with mock.patch(
            "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True
        ):
            mock_create_spark_config_str.return_value = "--conf spark.cores.max=5"
            args = mock.MagicMock(cmd="bash", mrjob=False)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf={"spark.ui.port": "1234", "spark.app.name": "fake_app"},
                cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
                pod_template_path="unique-run",
            )
            assert not mock_send_and_calculate_resources_cost.called
Exemplo n.º 5
0
    def test_configure_and_run_docker_container_mrjob(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        with mock.patch("paasta_tools.cli.cmds.spark_run.clusterman_metrics",
                        autospec=True):
            spark_conf = {
                "spark.cores.max": 5,
                "spark.master": "mesos://spark.master",
                "spark.ui.port": "1234",
                "spark.app.name": "fake_app",
            }
            args = mock.MagicMock(cmd="python mrjob_wrapper.py", mrjob=True)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
            )

            args, kwargs = mock_run_docker_container.call_args
            assert kwargs["docker_cmd"] == mock_get_docker_cmd.return_value

            assert mock_send_and_calculate_resources_cost.called
Exemplo n.º 6
0
    def test_suppress_clusterman_metrics_errors(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        with mock.patch(
            "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True
        ):
            mock_send_and_calculate_resources_cost.side_effect = Boto3Error
            mock_create_spark_config_str.return_value = "--conf spark.cores.max=5"
            spark_conf = {
                "spark.cores.max": 5,
                "spark.ui.port": "1234",
                "spark.app.name": "fake app",
            }
            args = mock.MagicMock(
                suppress_clusterman_metrics_errors=False, cmd="pyspark"
            )
            with pytest.raises(Boto3Error):
                configure_and_run_docker_container(
                    args=args,
                    docker_img="fake-registry/fake-service",
                    instance_config=self.instance_config,
                    system_paasta_config=self.system_paasta_config,
                    aws_creds=("id", "secret", "token"),
                    spark_conf=spark_conf,
                    cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
                    pod_template_path="unique-run",
                )

            # make sure we don't blow up when this setting is True
            args.suppress_clusterman_metrics_errors = True
            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
                cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
                pod_template_path="unique-run",
            )
Exemplo n.º 7
0
    def test_configure_and_run_docker_container(
        self,
        mock_time,
        mock_run_docker_container,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
    ):
        mock_pick_random_port.return_value = 123
        mock_get_username.return_value = 'fake_user'
        mock_get_spark_config.return_value = {'spark.app.name': 'fake_app'}
        mock_run_docker_container.return_value = 0
        mock_get_aws_credentials.return_value = ('id', 'secret')

        args = mock.MagicMock()
        args.cluster = 'fake_cluster'
        args.cmd = 'pyspark'
        args.work_dir = '/fake_dir:/spark_driver'
        args.dry_run = True
        args.mrjob = False
        args.nvidia = False

        retcode = configure_and_run_docker_container(
            args=args,
            docker_img='fake-registry/fake-service',
            instance_config=self.instance_config,
            system_paasta_config=self.system_paasta_config,
        )

        assert retcode == 0
        mock_run_docker_container.assert_called_once_with(
            container_name='paasta_spark_run_fake_user_123',
            volumes=[
                '/h1:/c1:ro',
                '/h2:/c2:ro',
                '/fake_dir:/spark_driver:rw',
                '/etc/passwd:/etc/passwd:ro',
                '/etc/group:/etc/group:ro',
            ],
            environment={
                'PAASTA_SERVICE': 'fake_service',
                'PAASTA_INSTANCE': 'fake_instance',
                'PAASTA_CLUSTER': 'fake_cluster',
                'PAASTA_DEPLOY_GROUP': 'fake_cluster.fake_instance',
                'PAASTA_DOCKER_IMAGE': 'fake_service:fake_sha',
                'PAASTA_LAUNCHED_BY': mock.ANY,
                'AWS_ACCESS_KEY_ID': 'id',
                'AWS_SECRET_ACCESS_KEY': 'secret',
                'SPARK_USER': '******',
                'SPARK_OPTS': '--conf spark.app.name=fake_app',
            },
            docker_img='fake-registry/fake-service',
            docker_cmd='pyspark --conf spark.app.name=fake_app',
            dry_run=True,
            nvidia=False,
        )
Exemplo n.º 8
0
    def test_configure_and_run_docker_container(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        mock_get_username.return_value = "fake_user"
        spark_conf = {"spark.app.name": "fake_app", "spark.ui.port": "1234"}
        mock_run_docker_container.return_value = 0

        args = mock.MagicMock()
        args.aws_region = "fake_region"
        args.cluster = "fake_cluster"
        args.cmd = "pyspark"
        args.work_dir = "/fake_dir:/spark_driver"
        args.dry_run = True
        args.mrjob = False
        args.nvidia = False
        with mock.patch.object(self.instance_config,
                               "get_env_dictionary",
                               return_value={"env1": "val1"}):
            retcode = configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
            )
        assert retcode == 0
        mock_run_docker_container.assert_called_once_with(
            container_name="fake_app",
            volumes=[
                "/fake_dir:/spark_driver:rw",
                "/nail/home:/nail/home:rw",
            ],
            environment={
                "env1": "val1",
                "AWS_ACCESS_KEY_ID": "id",
                "AWS_SECRET_ACCESS_KEY": "secret",
                "AWS_SESSION_TOKEN": "token",
                "AWS_DEFAULT_REGION": "fake_region",
                "SPARK_OPTS": mock_create_spark_config_str.return_value,
                "SPARK_USER": "******",
                "PAASTA_INSTANCE_TYPE": "spark",
                "PAASTA_LAUNCHED_BY": mock.ANY,
            },
            docker_img="fake-registry/fake-service",
            docker_cmd=mock_get_docker_cmd.return_value,
            dry_run=True,
            nvidia=False,
        )
Exemplo n.º 9
0
    def test_suppress_clusterman_metrics_errors(
        self,
        mock_time,
        mock_run_docker_container,
        mock_create_spark_config_str,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
    ):
        mock_get_aws_credentials.return_value = ('id', 'secret')

        with mock.patch(
                'paasta_tools.cli.cmds.spark_run.emit_resource_requirements',
                autospec=True,
        ) as mock_emit_resource_requirements, mock.patch(
                'paasta_tools.cli.cmds.spark_run.clusterman_metrics',
                autospec=True,
        ):
            mock_emit_resource_requirements.side_effect = Boto3Error
            mock_create_spark_config_str.return_value = '--conf spark.cores.max=5'

            args = mock.MagicMock(
                suppress_clusterman_metrics_errors=False,
                cmd='pyspark',
            )
            with pytest.raises(Boto3Error):
                configure_and_run_docker_container(
                    args=args,
                    docker_img='fake-registry/fake-service',
                    instance_config=self.instance_config,
                    system_paasta_config=self.system_paasta_config,
                )

            # make sure we don't blow up when this setting is True
            args.suppress_clusterman_metrics_errors = True
            configure_and_run_docker_container(
                args=args,
                docker_img='fake-registry/fake-service',
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
            )
Exemplo n.º 10
0
    def test_configure_and_run_docker_container_nvidia(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
    ):
        with mock.patch(
            "paasta_tools.cli.cmds.spark_run.clusterman_metrics", autospec=True
        ):
            spark_conf = {
                "spark.cores.max": "5",
                "spark.master": "mesos://spark.master",
                "spark.ui.port": "1234",
                "spark.app.name": "fake app",
            }
            args = mock.MagicMock(cmd="pyspark", nvidia=True)

            configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
                cluster_manager=spark_run.CLUSTER_MANAGER_MESOS,
                pod_template_path="unique-run",
            )

            args, kwargs = mock_run_docker_container.call_args
            assert kwargs["nvidia"]
            assert mock_send_and_calculate_resources_cost.called
Exemplo n.º 11
0
    def test_configure_and_run_docker_container(
        self,
        mock_time,
        mock_run_docker_container,
        mock_get_spark_config,
        mock_get_username,
        mock_pick_random_port,
        mock_os_path_exists,
        mock_get_aws_credentials,
    ):
        mock_pick_random_port.return_value = 123
        mock_get_username.return_value = "fake_user"
        mock_get_spark_config.return_value = {"spark.app.name": "fake_app"}
        mock_run_docker_container.return_value = 0
        mock_get_aws_credentials.return_value = ("id", "secret")

        args = mock.MagicMock()
        args.aws_region = "fake_region"
        args.cluster = "fake_cluster"
        args.cmd = "pyspark"
        args.work_dir = "/fake_dir:/spark_driver"
        args.dry_run = True
        args.mrjob = False
        args.nvidia = False

        with mock.patch(
                "paasta_tools.utils.get_service_docker_registry",
                autospec=True,
                return_value="fake-registry",
        ):
            retcode = configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
            )

        assert retcode == 0
        mock_run_docker_container.assert_called_once_with(
            container_name="paasta_spark_run_fake_user_123",
            volumes=[
                "/h1:/c1:ro",
                "/h2:/c2:ro",
                "/fake_dir:/spark_driver:rw",
                "/etc/passwd:/etc/passwd:ro",
                "/etc/group:/etc/group:ro",
                "/nail/home:/nail/home:rw",
            ],
            environment={
                "PAASTA_SERVICE": "fake_service",
                "PAASTA_INSTANCE": "fake_instance",
                "PAASTA_CLUSTER": "fake_cluster",
                "PAASTA_INSTANCE_TYPE": "spark",
                "PAASTA_DEPLOY_GROUP": "fake_cluster.fake_instance",
                "PAASTA_DOCKER_IMAGE": "fake_service:fake_sha",
                "PAASTA_LAUNCHED_BY": mock.ANY,
                "AWS_ACCESS_KEY_ID": "id",
                "AWS_SECRET_ACCESS_KEY": "secret",
                "AWS_DEFAULT_REGION": "fake_region",
                "SPARK_USER": "******",
                "SPARK_OPTS": "--conf spark.app.name=fake_app",
                "PAASTA_RESOURCE_CPUS": "0.25",
                "PAASTA_RESOURCE_DISK": "1024",
                "PAASTA_RESOURCE_MEM": "1024",
                "PAASTA_GIT_SHA": "fake_ser",
            },
            docker_img="fake-registry/fake-service",
            docker_cmd="pyspark --conf spark.app.name=fake_app",
            dry_run=True,
            nvidia=False,
        )
Exemplo n.º 12
0
    def test_configure_and_run_docker_driver_resource_limits_config(
        self,
        mock_get_history_url,
        mock_et_signalfx_url,
        mock_get_docker_cmd,
        mock_create_spark_config_str,
        mock_get_webui_url,
        mock_send_and_calculate_resources_cost,
        mock_run_docker_container,
        mock_get_username,
        cluster_manager,
        spark_args_volumes,
        expected_volumes,
    ):
        mock_get_username.return_value = "fake_user"
        spark_conf = {
            "spark.app.name": "fake_app",
            "spark.ui.port": "1234",
            "spark.driver.memory": "1g",
            "spark.driver.cores": "2",
            **spark_args_volumes,
        }
        mock_run_docker_container.return_value = 0

        args = mock.MagicMock()
        args.aws_region = "fake_region"
        args.cluster = "fake_cluster"
        args.cmd = "pyspark"
        args.work_dir = "/fake_dir:/spark_driver"
        args.dry_run = True
        args.mrjob = False
        args.nvidia = False
        args.enable_compact_bin_packing = False
        args.disable_aws_credential_env_variables = False
        args.cluster_manager = cluster_manager
        args.docker_cpu_limit = 3
        args.docker_memory_limit = "4g"
        with mock.patch.object(
            self.instance_config, "get_env_dictionary", return_value={"env1": "val1"}
        ):
            retcode = configure_and_run_docker_container(
                args=args,
                docker_img="fake-registry/fake-service",
                instance_config=self.instance_config,
                system_paasta_config=self.system_paasta_config,
                aws_creds=("id", "secret", "token"),
                spark_conf=spark_conf,
                cluster_manager=cluster_manager,
                pod_template_path="unique-run",
            )
        assert retcode == 0
        mock_run_docker_container.assert_called_once_with(
            container_name="fake_app",
            volumes=(
                expected_volumes
                + ["/fake_dir:/spark_driver:rw", "/nail/home:/nail/home:rw"]
            ),
            environment={
                "env1": "val1",
                "AWS_ACCESS_KEY_ID": "id",
                "AWS_SECRET_ACCESS_KEY": "secret",
                "AWS_SESSION_TOKEN": "token",
                "AWS_DEFAULT_REGION": "fake_region",
                "SPARK_OPTS": mock_create_spark_config_str.return_value,
                "SPARK_USER": "******",
                "PAASTA_INSTANCE_TYPE": "spark",
                "PAASTA_LAUNCHED_BY": mock.ANY,
            },
            docker_img="fake-registry/fake-service",
            docker_cmd=mock_get_docker_cmd.return_value,
            dry_run=True,
            nvidia=False,
            docker_memory_limit="4g",
            docker_cpu_limit=3,
        )
Exemplo n.º 13
0
def test_configure_and_run_docker_container(
    mock_run_docker_container,
    mock_get_spark_configuration,
    mock_get_username,
    mock_pick_random_port,
    mock_getcwd,
    mock_os_path_exists,
):
    mock_pick_random_port.return_value = 123
    mock_getcwd.return_value = 'fake_cwd'
    mock_get_username.return_value = 'fake_user'
    mock_get_spark_configuration.return_value = {'APP_NAME': 'fake_app'}
    mock_run_docker_container.return_value = 0

    args = mock.MagicMock()
    args.cluster = 'fake_cluster'
    args.cmd = 'pyspark'
    args.dry_run = True

    retcode = configure_and_run_docker_container(
        args=args,
        docker_img='fake-registry/fake-service',
        instance_config=InstanceConfig(
            cluster='fake_cluster',
            instance='fake_instance',
            service='fake_service',
            config_dict={
                'extra_volumes': [{
                    "hostPath": "/h1",
                    "containerPath": "/c1",
                    "mode": "RO",
                }],
            },
            branch_dict={'docker_image': 'fake_service:fake_sha'},
        ),
        system_paasta_config=SystemPaastaConfig(
            {
                'volumes': [{
                    "hostPath": "/h2",
                    "containerPath": "/c2",
                    "mode": "RO",
                }],
            },
            'fake_dir',
        ),
    )

    assert retcode == 0
    mock_run_docker_container.assert_called_once_with(
        container_name='paasta_spark_run_fake_user_123',
        volumes=[
            '/h1:/c1:ro',
            '/h2:/c2:ro',
            'fake_cwd:/spark_client:rw',
            '/etc/passwd:/etc/passwd:ro',
            '/etc/group:/etc/group:ro',
        ],
        environment={
            'PAASTA_SERVICE': 'fake_service',
            'PAASTA_INSTANCE': 'fake_instance',
            'PAASTA_CLUSTER': 'fake_cluster',
            'PAASTA_DEPLOY_GROUP': 'fake_cluster.fake_instance',
            'PAASTA_DOCKER_IMAGE': 'fake_service:fake_sha',
            'APP_NAME': 'fake_app',
        },
        docker_img='fake-registry/fake-service',
        docker_cmd='pyspark',
        dry_run=True,
    )
Exemplo n.º 14
0
def test_configure_and_run_docker_container(
    mock_run_docker_container,
    mock_get_spark_conf_str,
    mock_get_username,
    mock_pick_random_port,
    mock_os_path_exists,
    mock_get_credentials,
):
    mock_pick_random_port.return_value = 123
    mock_get_username.return_value = 'fake_user'
    mock_get_spark_conf_str.return_value = '--conf spark.app.name=fake_app'
    mock_run_docker_container.return_value = 0
    mock_get_credentials.return_value = mock.MagicMock(access_key='id', secret_key='secret')

    args = mock.MagicMock()
    args.cluster = 'fake_cluster'
    args.cmd = 'pyspark'
    args.work_dir = '/fake_dir:/spark_driver'
    args.dry_run = True

    retcode = configure_and_run_docker_container(
        args=args,
        docker_img='fake-registry/fake-service',
        instance_config=InstanceConfig(
            cluster='fake_cluster',
            instance='fake_instance',
            service='fake_service',
            config_dict={
                'extra_volumes': [{
                    "hostPath": "/h1",
                    "containerPath": "/c1",
                    "mode": "RO",
                }],
            },
            branch_dict={'docker_image': 'fake_service:fake_sha'},
        ),
        system_paasta_config=SystemPaastaConfig(
            {
                'volumes': [{
                    "hostPath": "/h2",
                    "containerPath": "/c2",
                    "mode": "RO",
                }],
            },
            'fake_dir',
        ),
    )

    assert retcode == 0
    mock_run_docker_container.assert_called_once_with(
        container_name='paasta_spark_run_fake_user_123',
        volumes=[
            '/h1:/c1:ro',
            '/h2:/c2:ro',
            '/fake_dir:/spark_driver:rw',
            '/etc/passwd:/etc/passwd:ro',
            '/etc/group:/etc/group:ro',
        ],
        environment={
            'PAASTA_SERVICE': 'fake_service',
            'PAASTA_INSTANCE': 'fake_instance',
            'PAASTA_CLUSTER': 'fake_cluster',
            'PAASTA_DEPLOY_GROUP': 'fake_cluster.fake_instance',
            'PAASTA_DOCKER_IMAGE': 'fake_service:fake_sha',
            'AWS_ACCESS_KEY_ID': 'id',
            'AWS_SECRET_ACCESS_KEY': 'secret',
            'SPARK_USER': '******',
            'SPARK_OPTS': '--conf spark.app.name=fake_app',
        },
        docker_img='fake-registry/fake-service',
        docker_cmd='pyspark --conf spark.app.name=fake_app',
        dry_run=True,
    )