示例#1
0
def test_container_environment(client, has_kerberos_enabled):
    commands = [
        'env', 'echo "LOGIN_ID=[$(whoami)]"',
        'hdfs dfs -touchz /user/testuser/test_container_permissions'
    ]
    service = skein.Service(resources=skein.Resources(memory=124, vcores=1),
                            commands=commands)
    spec = skein.ApplicationSpec(name="test_container_permissions",
                                 queue="default",
                                 services={'service': service})

    with run_application(client, spec=spec) as app:
        wait_for_success(client, app.id)

    logs = get_logs(app.id)
    assert "USER=testuser" in logs
    assert 'SKEIN_APPMASTER_ADDRESS=' in logs
    assert 'SKEIN_APPLICATION_ID=%s' % app.id in logs
    assert 'SKEIN_CONTAINER_ID=service_0' in logs
    assert 'SKEIN_RESOURCE_MEMORY=128' in logs
    assert 'SKEIN_RESOURCE_VCORES=1' in logs

    if has_kerberos_enabled:
        assert "LOGIN_ID=[testuser]" in logs
        assert "HADOOP_USER_NAME" not in logs
    else:
        assert "LOGIN_ID=[yarn]" in logs
        assert "HADOOP_USER_NAME" in logs
示例#2
0
def test_webui_acls(client, has_kerberos_enabled, ui_users, checks):
    if has_kerberos_enabled:
        pytest.skip("Testing only implemented for simple authentication")

    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            commands=['sleep infinity'])
    spec = skein.ApplicationSpec(name="test_webui_acls",
                                 queue="default",
                                 acls=skein.ACLs(enable=True,
                                                 ui_users=ui_users),
                                 services={'sleeper': service})

    with run_application(client, spec=spec) as app:
        # Wait for a single container
        initial = wait_for_containers(app, 1, states=['RUNNING'])
        assert initial[0].state == 'RUNNING'
        assert initial[0].service_name == 'sleeper'

        # Base url of web ui
        base = 'http://master.example.com:8088/proxy/%s' % app.id

        # Check proper subset of users allowed
        for user, ok in checks:
            resp = get_page(base + "?user.name=%s" % user)
            assert resp.ok == ok

        app.shutdown()
示例#3
0
def test_security_specified(client):
    security = skein.Security.new_credentials()
    spec = skein.ApplicationSpec(
        name="test_security_specified",
        master=skein.Master(security=security,
                            script='sleep infinity')
    )
    with run_application(client, spec=spec) as app:
        assert app.security is security
        assert app.security != client.security

        spec2 = app.get_specification()

        app2 = client.connect(app.id, security=security)
        # Smoketest, can communicate
        app2.get_specification()

        app3 = client.connect(app.id)
        with pytest.raises(skein.ConnectionError):
            # Improper security credentials
            app3.get_specification()

        app.shutdown()

    remote_security = spec2.master.security
    assert remote_security.cert_bytes is None
    assert remote_security.key_bytes is None
    assert remote_security.cert_file.source.startswith('hdfs')
    assert remote_security.key_file.source.startswith('hdfs')
示例#4
0
    def _build_specification(self):
        script = self.script_template.format(
            prologue=self.prologue,
            singleuser_command=self.singleuser_command,
            epilogue=self.epilogue)

        resources = skein.Resources(memory='%d b' % self.mem_limit,
                                    vcores=self.cpu_limit)

        security = skein.Security.new_credentials()

        # Support dicts as well as File objects
        files = {
            k: skein.File.from_dict(v) if isinstance(v, dict) else v
            for k, v in self.localize_files.items()
        }

        master = skein.Master(resources=resources,
                              files=files,
                              env=self.get_env(),
                              script=script,
                              security=security)

        return skein.ApplicationSpec(
            name='jupyterhub',
            queue=self.queue,
            user=self.user.name,
            master=master,
            delegation_token_providers=self.delegation_token_providers)
示例#5
0
文件: test_core.py 项目: lipaul/skein
def test_proxy_user(client):
    hdfs = pytest.importorskip('pyarrow.hdfs')

    spec = skein.ApplicationSpec(name="test_proxy_user",
                                 user="******",
                                 services={
                                     "service":
                                     skein.Service(resources=skein.Resources(
                                         memory=32, vcores=1),
                                                   script="sleep infinity")
                                 })
    with run_application(client, spec=spec) as app:
        spec2 = app.get_specification()
        client.kill_application(app.id, user="******")

    # Alice used throughout process
    assert spec2.user == 'alice'
    for fil in spec2.services['service'].files.values():
        assert fil.source.startswith(
            'hdfs://master.example.com:9000/user/alice')

    # Can get logs as user
    logs = get_logs(client, app.id, user="******")
    assert app.id in logs
    assert "application.master.log" in logs

    # Application directory deleted after kill
    fs = hdfs.connect()
    assert not fs.exists("/user/testuser/.skein/%s" % app.id)
示例#6
0
def test_fail_on_container_failure(client, with_restarts):
    script = ('if [[ "$SKEIN_CONTAINER_ID" != "test_0" ]]; then\n'
              '  exit 1\n'
              'else\n'
              '  sleep infinity\n'
              'fi')

    spec = skein.ApplicationSpec(
        name="test_fail_on_container_failure",
        services={
            'test':
            skein.Service(instances=2,
                          max_restarts=2 if with_restarts else 0,
                          resources=skein.Resources(memory=32, vcores=1),
                          script=script)
        })
    with run_application(client, spec=spec) as app:
        wait_for_completion(client, app.id) == "FAILED"

    logs = get_logs(app.id)
    assert "test_0" in logs
    assert "test_1" in logs
    assert ("test_2" in logs) == with_restarts
    assert ("test_3" in logs) == with_restarts
    assert "test_4" not in logs
示例#7
0
def test_memory_limit_exceeded(kind, client):
    resources = skein.Resources(memory=128, vcores=1)
    # Allocate noticeably more memory than the 128 MB limit
    script = 'python -c "b = bytearray(int(256e6)); import time; time.sleep(10)"'

    master = services = None
    if kind == 'master':
        master = skein.Master(resources=resources, script=script)
        search_txt = "memory limit"
    else:
        services = {
            'service': skein.Service(resources=resources, script=script)
        }
        search_txt = "memory used"
    spec = skein.ApplicationSpec(name="test_memory_limit_exceeded_%s" % kind,
                                 queue="default",
                                 master=master,
                                 services=services)
    with run_application(client, spec=spec, connect=False) as app_id:
        assert wait_for_completion(client, app_id) == "FAILED"
    logs = get_logs(app_id)
    assert search_txt in logs

    if kind == 'master':
        report = client.application_report(app_id)
        assert 'memory limit' in report.diagnostics
示例#8
0
def test_client_errors_nicely_if_not_logged_in(security, not_logged_in):
    appid = 'application_1526134340424_0012'

    spec = skein.ApplicationSpec(name="should_never_get_to_run",
                                 queue="default",
                                 services={
                                     'service':
                                     skein.Service(resources=skein.Resources(
                                         memory=32, vcores=1),
                                                   script='env')
                                 })

    with skein.Client(security=security) as client:
        for func, args in [('get_applications', ()), ('get_nodes', ()),
                           ('get_queue', ('default', )),
                           ('get_child_queues', ('default', )),
                           ('get_all_queues', ()),
                           ('application_report', (appid, )),
                           ('connect', (appid, )),
                           ('move_application', (appid, 'default')),
                           ('kill_application', (appid, )),
                           ('submit', (spec, ))]:
            with pytest.raises(skein.DriverError) as exc:
                getattr(client, func)(*args)
            assert 'kinit' in str(exc.value)
示例#9
0
def test_node_locality(client, strict):
    if strict:
        relax_locality = False
        nodes = ['worker.example.com']
        racks = []
    else:
        relax_locality = True
        nodes = ['not.a.real.host.name']
        racks = ['not.a.real.rack.name']

    service = skein.Service(
        resources=skein.Resources(memory=128, vcores=1),
        script='sleep infinity',
        nodes=nodes,
        racks=racks,
        relax_locality=relax_locality
    )
    spec = skein.ApplicationSpec(name="test_node_locality",
                                 queue="default",
                                 services={"service": service})
    with run_application(client, spec=spec) as app:
        wait_for_containers(app, 1, states=['RUNNING'])
        spec2 = app.get_specification()
        app.shutdown()

    service2 = spec2.services['service']
    assert service2.nodes == nodes
    assert service2.racks == racks
    assert service2.relax_locality == relax_locality
示例#10
0
def test_master_driver_shutdown_sequence(kind, master_cmd, service_cmd,
                                         client, tmpdir):
    spec = skein.ApplicationSpec(
        name="test_master_driver_shutdown_sequence_%s" % kind,
        master=skein.Master(script=master_cmd),
        services={
            'service': skein.Service(
                resources=skein.Resources(memory=128, vcores=1),
                script=service_cmd
            )
        }
    )

    state = 'SUCCEEDED' if kind.endswith('succeeds') else 'FAILED'

    if kind == 'service_succeeds':
        with run_application(client, spec=spec) as app:
            wait_for_containers(app, 1, states=['SUCCEEDED'])
            assert len(app.get_containers()) == 0
            # App hangs around until driver completes
            app.shutdown()
            assert wait_for_completion(client, app.id) == state
    else:
        with run_application(client, spec=spec, connect=False) as app_id:
            # service_fails results in immediate failure
            # driver_succeeds results in immediate success
            # driver_fails results in immediate failure
            assert wait_for_completion(client, app_id) == state
示例#11
0
def test_allow_failures_max_restarts(client, allow_failures):
    name = "test_max_restarts_allow_failures_%s" % str(allow_failures).lower()
    spec = skein.ApplicationSpec(
        name=name,
        master=skein.Master(
            script="sleep infinity"
        ),
        services={
            'myservice': skein.Service(
                instances=1,
                max_restarts=2,
                allow_failures=allow_failures,
                resources=skein.Resources(memory=128, vcores=1),
                script="exit 1"
            )
        }
    )
    with run_application(client, spec=spec) as app:
        if allow_failures:
            # Service failed 3 times, doesn't keep trying to run more
            wait_for_containers(app, 3, states=['FAILED'])
            # Check still running fine after 3 failures
            time.sleep(0.5)
            app.get_specification()
            # Shutdown manually
            app.shutdown()
            assert wait_for_completion(client, app.id) == 'SUCCEEDED'
        else:
            # Service failed 3 times and then terminates
            assert wait_for_completion(client, app.id) == 'FAILED'
示例#12
0
def launch_remote_check(file: str) -> Tuple[bool, str]:
    logging.info('Launching remote check')
    zip_hdfs, _ = cluster_pack.upload_env(packer=cluster_pack.PEX_PACKER)
    archive_name = os.path.basename(zip_hdfs)
    with skein.Client() as client:
        files = {
            archive_name: zip_hdfs,
            'check_hadoop_env.py': __file__,
        }
        editable_packages = cluster_pack.get_editable_requirements()
        if 'tf_yarn' in editable_packages:
            tf_yarn_zip = cluster_pack.zip_path(editable_packages['tf_yarn'],
                                                False)
            logger.info(f"zip path for editable tf_yarn is {tf_yarn_zip}")
            files.update({'tf_yarn': tf_yarn_zip})
        service = skein.Service(
            script=f'./{archive_name} check_hadoop_env.py --file {file}',
            resources=skein.Resources(2 * 1024, 1),
            env={
                'PEX_ROOT': '/tmp/{uuid.uuid4()}/',
                'PYTHONPATH': '.:',
            },
            files=files,
            instances=1)
        spec = skein.ApplicationSpec(
            {'HADOOP_ENV_CHECKER': service},
            acls=skein.model.ACLs(enable=True, view_users=['*']),
        )
        app = client.submit_and_connect(spec)

        logging.info('Remote check started')
        result = app.kv.wait('result').decode()
        app_id = app.id
        app.shutdown()
        return result == "True", app_id
示例#13
0
def _setup_skein_cluster(pyenvs: Dict[NodeLabel, PythonEnvDescription],
                         task_specs: Dict[str, TaskSpec] = TASK_SPEC_NONE,
                         *,
                         skein_client: skein.Client = None,
                         files: Dict[str, str] = None,
                         env: Dict[str, str] = {},
                         queue: str = "default",
                         acls: ACLs = None,
                         file_systems: List[str] = None,
                         log_conf_file: str = None,
                         standalone_client_mode: bool = False) -> SkeinCluster:
    os.environ["JAVA_TOOL_OPTIONS"] = \
        "-XX:ParallelGCThreads=1 -XX:CICompilerCount=2 "\
        f"{os.environ.get('JAVA_TOOL_OPTIONS', '')}"

    with tempfile.TemporaryDirectory() as tempdir:
        task_files, task_env = _setup_task_env(tempdir, files, env)
        services = {}
        for task_type, task_spec in list(task_specs.items()):
            pyenv = pyenvs[task_spec.label]
            service_env = task_env.copy()
            if task_spec.termination_timeout_seconds >= 0:
                _add_to_env(service_env, "SERVICE_TERMINATION_TIMEOUT_SECONDS",
                            str(task_spec.termination_timeout_seconds))
            services[task_type] = skein.Service(
                script=gen_task_cmd(pyenv, log_conf_file),
                resources=skein.model.Resources(task_spec.memory,
                                                task_spec.vcores),
                max_restarts=0,
                instances=task_spec.instances,
                node_label=task_spec.label.value,
                files={
                    **task_files, pyenv.dest_path: pyenv.path_to_archive
                },
                env=service_env)

        spec = skein.ApplicationSpec(services,
                                     queue=queue,
                                     acls=acls,
                                     file_systems=file_systems)

        if skein_client is None:
            skein_client = skein.Client()

        task_instances = [(task_type, spec.instances)
                          for task_type, spec in task_specs.items()]
        events: Dict[str, Dict[str, str]] = \
            {task: {} for task in iter_tasks(task_instances)}
        app = skein_client.submit_and_connect(spec)
        # Start a thread which collects all events posted by all tasks in kv store
        event_listener = Thread(target=_aggregate_events,
                                args=(app.kv, events))
        event_listener.start()

        cluster_spec = _setup_cluster_tasks(task_instances, app,
                                            standalone_client_mode)

        return SkeinCluster(skein_client, app, task_instances, cluster_spec,
                            event_listener, events)
示例#14
0
def test_hadoop3_resource(client):
    spec = skein.ApplicationSpec(name="test_hadoop3_resources",
                                 master=skein.Master(resources=skein.Resources(
                                     memory='32 MiB', vcores=1, gpus=1),
                                                     script="sleep infinity"))
    with pytest.raises(ValueError) as exc:
        client.submit(spec)
    if HADOOP3:
        assert "Resource 'yarn.io/gpu'" in str(exc.value)
    else:
        assert "Custom resources not supported"
示例#15
0
def test_file_systems(client):
    script = 'hdfs dfs -touchz /user/testuser/test_file_systems'
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            script=script)
    spec = skein.ApplicationSpec(name="test_file_systems",
                                 queue="default",
                                 services={'service': service},
                                 file_systems=["hdfs://master.example.com:9000"])

    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == 'SUCCEEDED'
示例#16
0
def create_skein_app():
    service = skein.Service(commands=['./example.pex distributed.py --server'],
                            resources=skein.Resources(2 * 1024, 1),
                            env={'PEX_ROOT': '/tmp/{uuid.uuid4()}/'},
                            files={
                                'example.pex': 'example.pex',
                                'distributed.py': __file__
                            },
                            instances=2)
    spec = skein.ApplicationSpec({NODE_NAME: service}, queue='dev')
    return spec
示例#17
0
def test_add_container(client):
    script = ('echo "$SKEIN_CONTAINER_ID - MYENV=$MYENV"\n'
              'echo "$SKEIN_CONTAINER_ID - MYENV2=$MYENV2"\n'
              'if [[ "$MYENV" == "bar" ]]; then\n'
              '  exit 1\n'
              'else\n'
              '  exit 0\n'
              'fi')

    spec = skein.ApplicationSpec(name="test_add_container",
                                 master=skein.Master(script="sleep infinity"),
                                 services={
                                     'test':
                                     skein.Service(instances=0,
                                                   resources=skein.Resources(
                                                       memory=32, vcores=1),
                                                   env={
                                                       'MYENV': 'foo',
                                                       'MYENV2': 'baz'
                                                   },
                                                   max_restarts=1,
                                                   script=script)
                                 })

    with run_application(client, spec=spec) as app:
        # Add container with new overrides
        c = app.add_container('test')
        assert c.instance == 0
        wait_for_containers(app, 1, states=['RUNNING', 'SUCCEEDED'])

        # Non-existant service
        with pytest.raises(ValueError):
            app.add_container('foobar')

        # Add container with override for MYENV
        c = app.add_container('test', {'MYENV': 'bar'})
        assert c.instance == 1

        # The new env var triggers a failure, should fail twice,
        # then fail the whole application
        assert wait_for_completion(client, app.id) == 'FAILED'

    logs = get_logs(app.id)
    assert "test_0 - MYENV=foo" in logs
    assert "test_0 - MYENV2=baz" in logs

    assert "test_1 - MYENV=bar" in logs
    assert "test_1 - MYENV2=baz" in logs

    assert "test_2 - MYENV=bar" in logs
    assert "test_2 - MYENV2=baz" in logs

    assert "test_3" not in logs
示例#18
0
def test_file_systems(client):
    commands = ['hdfs dfs -touchz /user/testuser/test_file_systems']
    service = skein.Service(resources=skein.Resources(memory=124, vcores=1),
                            commands=commands)
    spec = skein.ApplicationSpec(
        name="test_file_systems",
        queue="default",
        services={'service': service},
        file_systems=["hdfs://master.example.com:9000"])

    with run_application(client, spec=spec) as app:
        wait_for_success(client, app.id)
示例#19
0
    def _build_specification(self, cluster, cert_path, key_path):
        files = {
            k: skein.File.from_dict(v) if isinstance(v, dict) else v
            for k, v in cluster.config.localize_files.items()
        }

        files["dask.crt"] = cert_path
        files["dask.pem"] = key_path

        scheduler_cmd = " ".join(self.get_scheduler_command(cluster))
        worker_cmd = " ".join(
            self.get_worker_command(
                cluster,
                worker_name="$DASK_GATEWAY_WORKER_NAME",
                scheduler_address="$DASK_GATEWAY_SCHEDULER_ADDRESS",
            ))
        scheduler_script = f"{cluster.config.scheduler_setup}\n{scheduler_cmd}"
        worker_script = f"{cluster.config.worker_setup}\n{worker_cmd}"

        master = skein.Master(
            security=self._get_security(cluster),
            resources=skein.Resources(
                memory="%d b" % cluster.config.scheduler_memory,
                vcores=cluster.config.scheduler_cores,
            ),
            files=files,
            env=self.get_scheduler_env(cluster),
            script=scheduler_script,
        )

        services = {
            "dask.worker":
            skein.Service(
                resources=skein.Resources(
                    memory="%d b" % cluster.config.worker_memory,
                    vcores=cluster.config.worker_cores,
                ),
                instances=0,
                max_restarts=0,
                allow_failures=True,
                files=files,
                env=self.get_worker_env(cluster),
                script=worker_script,
            )
        }

        return skein.ApplicationSpec(
            name="dask-gateway",
            queue=cluster.config.queue,
            user=cluster.username,
            master=master,
            services=services,
        )
示例#20
0
def test_set_log_level(client):
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            script='ls')
    spec = skein.ApplicationSpec(name="test_custom_log4j_properties",
                                 queue="default",
                                 master=skein.Master(log_level='debug'),
                                 services={'service': service})

    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == 'SUCCEEDED'

    logs = get_logs(app.id)
    assert 'DEBUG' in logs
示例#21
0
def test_memory_limit_exceeded(client):
    # Allocate noticeably more memory than the 128 MB limit
    service = skein.Service(
        resources=skein.Resources(memory=128, vcores=1),
        commands=[
            'python -c "b = bytearray(int(256e6)); import time; time.sleep(10)"'
        ])
    spec = skein.ApplicationSpec(name="test_memory_limit_exceeded",
                                 queue="default",
                                 services={"service": service})
    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == "FAILED"
    logs = get_logs(app.id)
    assert "memory used" in logs
 def launch_skein():
     with skein.Client() as client:
         service = skein.Service(
             resources=skein.model.Resources("1 GiB", 1),
             script=f'''
                 set -x
                 hdfs dfs -cat {filepath_on_hdfs}
             '''
         )
         spec = skein.ApplicationSpec(services={"service": service})
         app_id = client.submit(spec)
         skein_launcher.wait_for_finished(client, app_id)
         logs = skein_launcher.get_application_logs(client, app_id, 2)
         for key, value in logs.items():
             print(f"skein logs:{key} {value}")
示例#23
0
def test_move_application(client):
    spec = skein.ApplicationSpec(name="test_move_application",
                                 queue="default",
                                 master=skein.Master(script="sleep infinity"))

    def assert_good_message(msg):
        # Ensure message doesn't contain traceback
        assert 'org.apache.hadoop' not in str(msg)

    with run_application(client, spec=spec) as app:
        assert client.application_report(app.id).queue == "default"

        # Successful move
        client.move_application(app.id, "apples")
        assert client.application_report(app.id).queue == "apples"

        # Not a leaf queue
        with pytest.raises(ValueError) as exc:
            client.move_application(app.id, "fruit")
        assert 'Leaf' in str(exc.value)
        assert_good_message(exc.value)

        # Queue doesn't exist
        with pytest.raises(ValueError) as exc:
            client.move_application(app.id, "missing")
        assert "doesn't exist" in str(exc.value)
        assert_good_message(exc.value)

        app.shutdown()

    # App already shutdown
    with pytest.raises(ValueError) as exc:
        client.move_application(app.id, "default")
    assert "cannot be moved" in str(exc.value)
    assert_good_message(exc.value)

    # App doesn't exist
    missing_appid = 'application_1526134340424_0012'
    with pytest.raises(ValueError) as exc:
        client.move_application(missing_appid, "default")
    # This error message is different in Hadoop 3
    assert "absent" in str(exc.value) or "doesn't exist" in str(exc.value)
    assert_good_message(exc.value)

    # Invalid application id
    with pytest.raises(ValueError) as exc:
        client.move_application("oh no", "default")
    assert "Invalid" in str(exc.value)
示例#24
0
def test_proxy_user_no_permissions(client):
    spec = skein.ApplicationSpec(name="test_proxy_user_no_permissions",
                                 user="******",
                                 services={
                                     'service':
                                     skein.Service(resources=skein.Resources(
                                         memory=128, vcores=1),
                                                   commands=['env'])
                                 })
    # No permission to submit as user
    with pytest.raises(skein.DriverError) as exc:
        client.submit(spec)

    exc_msg = str(exc.value)
    assert 'testuser' in exc_msg
    assert 'bob' in exc_msg
示例#25
0
def test_master_driver_foo(client, tmpdir):
    filpath = str(tmpdir.join("dummy-file"))
    with open(filpath, 'w') as fil:
        fil.write('foobar')

    spec = skein.ApplicationSpec(name="test_master_driver",
                                 master=skein.Master(script='ls\nenv',
                                                     env={'FOO': 'BAR'},
                                                     files={'myfile':
                                                            filpath}))
    with run_application(client, spec=spec, connect=False) as app_id:
        assert wait_for_completion(client, app_id) == 'SUCCEEDED'

    logs = get_logs(app_id)
    assert 'FOO=BAR' in logs
    assert 'myfile' in logs
示例#26
0
def test_custom_log4j_properties(client, tmpdir):
    configpath = str(tmpdir.join("log4j.properties"))
    service = skein.Service(resources=skein.Resources(memory=128, vcores=1),
                            script='ls')
    spec = skein.ApplicationSpec(name="test_custom_log4j_properties",
                                 queue="default",
                                 master=skein.Master(log_config=configpath),
                                 services={'service': service})
    with open(configpath, 'w') as f:
        f.write(custom_log4j_properties)

    with run_application(client, spec=spec) as app:
        assert wait_for_completion(client, app.id) == 'SUCCEEDED'

    logs = get_logs(app.id)
    assert 'CUSTOM-LOG4J-SUCCEEDED' in logs
示例#27
0
def test_retries_succeeds(client):
    hdfs = pytest.importorskip('pyarrow.hdfs')

    spec = skein.ApplicationSpec(
        name="test_application_retries_succeeds",
        max_attempts=2,
        master=skein.Master(script=test_retries_script_template.format(
            succeed_on='02')))
    with run_application(client, spec=spec, connect=False) as app_id:
        assert wait_for_completion(client, app_id) == 'SUCCEEDED'
    logs = get_logs(app_id)
    assert 'Failing on other attempts' in logs
    assert 'Application attempt 1 out of 2 failed, will retry' in logs
    assert 'Succeeding on attempt 02' in logs

    fs = hdfs.connect()
    assert not fs.exists("/user/testuser/.skein/%s" % app_id)
示例#28
0
    def _build_specification(self, cluster_info, cert_path, key_path):
        files = {
            k: skein.File.from_dict(v) if isinstance(v, dict) else v
            for k, v in self.localize_files.items()
        }

        files["dask.crt"] = cert_path
        files["dask.pem"] = key_path

        env = self.get_env(cluster_info)

        scheduler_script = "\n".join(
            [self.scheduler_setup, self.scheduler_command])
        worker_script = "\n".join([self.worker_setup, self.worker_command])

        master = skein.Master(
            security=self._get_security(cluster_info),
            resources=skein.Resources(memory="%d b" % self.scheduler_memory,
                                      vcores=self.scheduler_cores),
            files=files,
            env=env,
            script=scheduler_script,
        )

        services = {
            "dask.worker":
            skein.Service(
                resources=skein.Resources(memory="%d b" % self.worker_memory,
                                          vcores=self.worker_cores),
                instances=0,
                max_restarts=0,
                allow_failures=True,
                files=files,
                env=env,
                script=worker_script,
            )
        }

        return skein.ApplicationSpec(
            name="dask-gateway",
            queue=self.queue,
            user=cluster_info.username,
            master=master,
            services=services,
        )
示例#29
0
def test_retries_fails(client):
    hdfs = pytest.importorskip('pyarrow.hdfs')

    # Global maximum is 2, checks that appmaster uses 2 instead of 10
    max_attempts = 10

    spec = skein.ApplicationSpec(
        name="test_application_retries_fails",
        max_attempts=max_attempts,
        master=skein.Master(script=test_retries_script_template.format(
            succeed_on='03')))
    with run_application(client, spec=spec, connect=False) as app_id:
        assert wait_for_completion(client, app_id) == 'FAILED'
    logs = get_logs(app_id)
    assert logs.count('Failing on other attempts') == 2
    assert 'Application attempt 1 out of 2 failed' in logs

    fs = hdfs.connect()
    assert not fs.exists("/user/testuser/.skein/%s" % app_id)
示例#30
0
def test_proxy_user_no_permissions(client, hadoop3):
    if hadoop3:
        pytest.skip("Lack of proxyuser permissions causes "
                    "yarnclient to hang in hadoop3")
    spec = skein.ApplicationSpec(name="test_proxy_user_no_permissions",
                                 user="******",
                                 services={
                                     'service':
                                     skein.Service(resources=skein.Resources(
                                         memory=32, vcores=1),
                                                   script='env')
                                 })
    # No permission to submit as user
    with pytest.raises(skein.DriverError) as exc:
        client.submit(spec)

    exc_msg = str(exc.value)
    assert 'testuser' in exc_msg
    assert 'bob' in exc_msg