示例#1
0
def test_environment():
    user = USER_2
    test_environment_file = get_test_environment_file(user=user)
    with ExitStack() as stack:
        stack.enter_context(clear_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        clusters = show_clusters()
        assert clusters == {}

        cluster = add_cluster(name=TEST_CLUSTER,
                              user=user,
                              host='localhost',
                              port=2222)

        clusters = show_clusters()
        assert show_cluster(name=TEST_CLUSTER) is cluster
        assert len(show_clusters()) == 1
        assert clusters[TEST_CLUSTER] == cluster
        assert cluster.name == TEST_CLUSTER

        try:
            save_environment(path=test_environment_file)
            with clear_environment(user):
                assert show_clusters() == {}
                load_environment(path=test_environment_file)
                cluster2 = show_cluster(name=TEST_CLUSTER)
                assert cluster2 is not cluster
                assert cluster2 == cluster
        finally:
            os.remove(test_environment_file)
示例#2
0
def test_environment_create_modify_save_load():
    user = USER_19
    test_environment_file = get_test_environment_file(user=user)
    test_environment_file2 = get_test_environment_file(user=user) + '_2'
    with ExitStack() as stack:
        stack.enter_context(clear_environment(user))

        assert show_clusters() == {}

        add_cluster(name=TEST_CLUSTER, user=user, host='localhost')

        config = show_cluster(TEST_CLUSTER).config
        check_config_is_default(config=config, user=user)
        try:
            save_environment(path=test_environment_file)
            with open(test_environment_file, 'r') as test_file:
                contents = test_file.read().splitlines()

            pprint(contents)
            assert contents == get_default_config_contents(user=user)

            config = show_cluster(TEST_CLUSTER).config
            config.host = 'localhost2'
            config.port = 2222
            config.user = '******'
            config.auth = AuthMethod.PUBLIC_KEY
            config.key = './fake-key'
            config.install_key = False
            config.disable_sshd = True
            config.setup_actions.jupyter = ['abc']
            config.setup_actions.dask = ['abc', 'def']
            config.scratch = '$HOME2'
            config.use_jupyter_lab = False
            set_log_level(logging.INFO)

            check_config_is_modified(config=config)

            save_environment(path=test_environment_file2)
            with open(test_environment_file2, 'r') as test_file:
                contents = test_file.read().splitlines()

            pprint(contents)
            assert contents == get_modified_config_contents()

            load_environment(test_environment_file)

            config = show_cluster(TEST_CLUSTER).config
            check_config_is_default(config=config, user=user)

            load_environment(test_environment_file2)

            config = show_cluster(TEST_CLUSTER).config
            check_config_is_modified(config=config)

        finally:
            try:
                os.remove(test_environment_file)
            finally:
                os.remove(test_environment_file2)
示例#3
0
def test_node_tunnel_fall_back_when_local_port_taken():
    """Checks that a tunnel will fall back to a random port if local port is
        taken."""
    user = USER_53
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        node = nodes[0]
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        there = get_free_remote_port(node=node)
        here = get_free_local_port()

        tunnel_1 = node.tunnel(there=there, here=here)
        stack.enter_context(close_tunnel_on_exit(tunnel_1))
        assert tunnel_1.here == here

        tunnel_2 = node.tunnel(there=there, here=here)
        stack.enter_context(close_tunnel_on_exit(tunnel_2))
        assert tunnel_2.here != here
示例#4
0
def check_remote_key_and_node_access(stack: ExitStack, user: str):
    public_key_value = get_public_key_value()

    cluster = show_cluster(name=TEST_CLUSTER)
    node = cluster.get_access_node()
    with set_password(get_test_user_password(user)):
        assert node.run('whoami') == user
    assert node.run('whoami') == user

    node.run("grep '{public_key_value}' ~/.ssh/authorized_keys".format(
        public_key_value=public_key_value))

    with pytest.raises(RuntimeError):
        node.run(
            "grep '{public_key_value}' ~/.ssh/authorized_keys.idact".format(
                public_key_value=public_key_value))

    nodes = cluster.allocate_nodes(nodes=2,
                                   cores=1,
                                   memory_per_node=MiB(100),
                                   walltime=Walltime(minutes=30))
    stack.enter_context(cancel_on_exit(nodes))
    print(nodes)

    nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
    node.run("grep '{public_key_value}' ~/.ssh/authorized_keys.idact".format(
        public_key_value=public_key_value))

    # Access to node without password works.
    assert nodes[0].run('whoami') == user

    check_direct_access_from_access_node_does_not_work(nodes[0])
示例#5
0
def test_able_to_sync_dask():
    user = USER_55
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        stack.enter_context(cancel_on_exit(nodes))
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        dask = deploy_dask(nodes)
        stack.enter_context(cancel_on_exit(dask))

        deployments = cluster.pull_deployments()
        assert not deployments.dask_deployments

        cluster.push_deployment(deployment=dask)
        deployments = cluster.pull_deployments()
        print(deployments)

        assert len(deployments.dask_deployments) == 1
        dask_2 = deployments.dask_deployments[0]
        try:
            assert dask.diagnostics.addresses != dask_2.diagnostics.addresses
            for url in dask.diagnostics.addresses:
                check_http_connection(url=url)
            for url in dask_2.diagnostics.addresses:
                check_http_connection(url=url)
        finally:
            dask_2.cancel_local()
示例#6
0
def test_dask_deployment_with_redeploy_failure():
    user = USER_42
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        assert idact.detail.dask.deploy_dask_impl.validate_worker
        stored_validate_worker = \
            idact.detail.dask.deploy_dask_impl.validate_worker

        def fake_validate_worker(worker: DaskWorkerDeployment):
            print("Fake worker validation.")
            raise ValueError("Fake worker validation fail.")

        try:
            idact.detail.dask.deploy_dask_impl.validate_worker = \
                fake_validate_worker

            with pytest.raises(RuntimeError):
                with deploy_dask_on_testing_cluster(nodes):
                    pass

        finally:
            idact.detail.dask.deploy_dask_impl.validate_worker = \
                stored_validate_worker
def test_able_to_sync_jupyter():
    user = USER_47
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        stack.enter_context(cancel_on_exit(nodes))
        node = nodes[0]
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        local_port = get_free_local_port()
        jupyter = node.deploy_notebook(local_port=local_port)
        stack.enter_context(cancel_on_exit(jupyter))

        deployments = cluster.pull_deployments()
        assert not deployments.jupyter_deployments

        cluster.push_deployment(deployment=jupyter)
        deployments = cluster.pull_deployments()
        print(deployments)

        assert len(deployments.jupyter_deployments) == 1
        jupyter_2 = deployments.jupyter_deployments[0]
        try:
            assert jupyter.local_port != jupyter_2.local_port
            check_local_http_connection(port=jupyter.local_port)
            check_local_http_connection(port=jupyter_2.local_port)
        finally:
            jupyter_2.cancel_local()
示例#8
0
def test_cancelled_dask_allocation_is_discarded_on_pull():
    user = USER_56
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        stack.enter_context(cancel_on_exit(nodes))
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        dask = deploy_dask(nodes)
        stack.enter_context(cancel_on_exit(dask))

        try:
            deployments = cluster.pull_deployments()
            assert not deployments.jupyter_deployments

            cluster.push_deployment(deployment=dask)

            dask.cancel()
            dask = None

            deployments = cluster.pull_deployments()
            assert not deployments.jupyter_deployments
        finally:
            if dask is not None:
                dask.cancel()
def test_cancelled_jupyter_allocation_is_discarded_on_pull():
    user = USER_48
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        stack.enter_context(cancel_on_exit(nodes))
        node = nodes[0]
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        local_port = get_free_local_port()
        jupyter = node.deploy_notebook(local_port=local_port)
        try:
            deployments = cluster.pull_deployments()
            assert not deployments.jupyter_deployments

            cluster.push_deployment(deployment=jupyter)

            jupyter.cancel()
            jupyter = None

            deployments = cluster.pull_deployments()
            assert not deployments.jupyter_deployments
        finally:
            if jupyter is not None:
                jupyter.cancel()
示例#10
0
def test_cancelled_node_allocation_is_discarded_on_pull():
    user = USER_45
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        try:
            nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes.running()

            cluster.push_deployment(deployment=nodes)

            deployments = cluster.pull_deployments()
            assert len(deployments.nodes) == 1
            assert deployments.nodes[0].running()
            nodes.cancel()
            nodes = None

            deployments = cluster.pull_deployments()
            assert not deployments.nodes
        finally:
            if nodes is not None:
                nodes.cancel()
示例#11
0
def test_nodes_sync_does_not_work_when_waiting_twice():
    """Port info was already deleted, so waiting for the second time defaults
        to port 22."""
    user = USER_44
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        with cancel_on_exit(nodes):
            cluster.push_deployment(deployment=nodes)

            nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes.running()
            node = nodes[0]
            assert node.port != 22

            deployments = cluster.pull_deployments()
            assert len(deployments.nodes) == 1
            nodes_2 = deployments.nodes[0]

            nodes_2.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes_2.running()
            node_2 = nodes_2[0]

            assert node_2.port == 22
            assert node_2.host == node.host
示例#12
0
def test_remove_runtime_dir_test():
    user = USER_15
    with ExitStack() as stack:
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))
        node = nodes[0]
        try:
            nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes.running()

            check_will_remove_empty(node=node)
            check_will_ignore_non_existent(node=node)
            check_will_remove_files(node=node)
            check_will_not_remove_dotfiles(node=node)
            check_will_not_remove_nested_dirs(node=node)
        finally:
            node.run("rm -rf *")
示例#13
0
def test_environment_missing_and_defaults():
    user = USER_25
    with ExitStack() as stack:
        stack.enter_context(clear_environment(user))

        EnvironmentProvider()._environment = None  # noqa, pylint: disable=protected-access,line-too-long

        assert show_clusters() == {}

        assert not os.path.isfile(os.environ['IDACT_CONFIG_PATH'])

        with pytest.raises(ValueError):
            load_environment()

        add_cluster(name=TEST_CLUSTER, user=user, host='localhost')

        config = show_cluster(TEST_CLUSTER).config
        set_log_level(logging.DEBUG)
        check_config_is_default(config=config, user=user)
        try:
            save_environment()
            with open(os.environ['IDACT_CONFIG_PATH'], 'r') as test_file:
                contents = test_file.read().splitlines()

            pprint(contents)
            assert contents == get_default_config_contents(user=user)

        finally:
            os.remove(os.environ['IDACT_CONFIG_PATH'])
示例#14
0
def test_able_to_sync_nodes_before_and_after_wait():
    user = USER_43
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        nodes_2 = None
        nodes_3 = None
        try:
            deployments = cluster.pull_deployments()
            assert not deployments.nodes

            cluster.push_deployment(deployment=nodes)
            nodes = None
            deployments = cluster.pull_deployments()
            print(deployments)

            assert len(deployments.nodes) == 1
            nodes_2 = deployments.nodes[0]
            assert len(nodes_2) == 1
            nodes_2.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes_2.running()
            node_2 = nodes_2[0]

            cluster.push_deployment(deployment=nodes_2)
            nodes_2 = None
            deployments = cluster.pull_deployments()
            print(deployments)

            assert len(deployments.nodes) == 1
            nodes_3 = deployments.nodes[0]

            assert nodes_3.running()
            with pytest.raises(RuntimeError):
                nodes_3.wait()
            assert len(nodes_3) == 1
            node_3 = nodes_3[0]

            assert node_2.host == node_3.host
            assert node_2.port == node_3.port
            assert node_3.resources.cpu_cores == 1
            assert node_3.resources.memory_total == bitmath.GiB(1)
            print(node_3)

            assert node_3.run('whoami') == user
        finally:
            if nodes is not None:
                nodes.cancel()
            if nodes_2 is not None:
                nodes_2.cancel()
            if nodes_3 is not None:
                nodes_3.cancel()
def run_invalid_squeue_output_test(user: str, output: str):
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)

        assert extract_squeue_line(
            now=utc_now(), line=output, node=cluster.get_access_node()) is None
示例#16
0
def test_cannot_pull_environment_when_missing_on_cluster():
    user = USER_27
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        with pytest.raises(RuntimeError):
            pull_environment(cluster=cluster)
示例#17
0
def test_cannot_deploy_dask_on_zero_nodes():
    user = USER_20
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        print(show_cluster(name=TEST_CLUSTER))
        with pytest.raises(ValueError):
            deploy_dask(nodes=[])
示例#18
0
def test_cannot_pull_environment_when_invalid_format_on_cluster():
    user = USER_33
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        node = cluster.get_access_node()
        node.run("echo abc > ~/idact-bad-config")
        with pytest.raises(JSONDecodeError):
            pull_environment(cluster=cluster, path="~/idact-bad-config")
示例#19
0
def test_environment_add_cluster_and_remove():
    user = USER_26
    with ExitStack() as stack:
        stack.enter_context(clear_environment(user))

        assert show_clusters() == {}

        add_cluster(name=TEST_CLUSTER, user=user, host='localhost')

        add_cluster(name='fake cluster', user=user, host='localhost2')

        assert len(show_clusters()) == 2

        cluster = show_cluster('fake cluster')

        assert cluster.config.host == 'localhost2'

        remove_cluster('fake cluster')

        assert len(show_clusters()) == 1

        add_cluster(name='fake cluster', user=user, host='localhost3')

        cluster2 = show_cluster('fake cluster')

        assert cluster.config.host == 'localhost2'
        assert cluster2.config.host == 'localhost3'

        cluster3 = show_cluster(TEST_CLUSTER)

        assert cluster3.config.host == 'localhost'

        remove_cluster('fake cluster')
        remove_cluster(TEST_CLUSTER)

        assert show_clusters() == {}

        with pytest.raises(KeyError):
            remove_cluster('fake cluster')
示例#20
0
def test_node_tunnel_stress():
    user = USER_40
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100))
        stack.enter_context(cancel_on_exit(nodes))
        run_tunnel_stress_test(stack=stack, user=user, nodes=nodes)
示例#21
0
def test_node_tunnel():
    """Allocates a node and creates a tunnel."""
    user = USER_5
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        run_tunnel_test(user=user, nodes=nodes)
示例#22
0
def test_node_tunnel_fall_back_when_local_port_free_but_fails():
    """Checks that a tunnel will fall back to a random port if local port is
        is initially free, but tunnel cannot be created anyway (e.g. another
        process binds to it at the last moment)."""
    user = USER_54
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        node = nodes[0]
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        there = get_free_remote_port(node=node)
        here = get_free_local_port()

        real_build_tunnel = idact.detail.nodes.node_impl.build_tunnel
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        tries = [0]

        def fake_build_tunnel(*args, **kwargs) -> TunnelInternal:
            tries[0] += 1
            if tries[0] == 1:
                raise RuntimeError("Fake failure.")
            if tries[0] != 2:
                assert False

            return real_build_tunnel(*args, **kwargs)

        try:
            idact.detail.nodes.node_impl.build_tunnel = fake_build_tunnel
            tunnel = node.tunnel(there=there, here=here)
            stack.enter_context(close_tunnel_on_exit(tunnel))
            assert tries[0] == 2
            assert tunnel.here != here
        finally:
            idact.detail.nodes.node_impl.build_tunnel = real_build_tunnel
            sock.close()
示例#23
0
def test_jupyter_deployment():
    user = USER_6
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        with deploy_jupyter(nodes):
            pass
示例#24
0
def test_node_tunnel_public_key():
    """Allocates a node and creates a tunnel, uses public key authentication.
    """
    user = USER_13
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(
            reset_environment(user=user, auth=AuthMethod.PUBLIC_KEY))

        cluster = show_cluster(name=TEST_CLUSTER)

        with set_password(get_test_user_password(user)):
            nodes = cluster.allocate_nodes(nodes=1,
                                           cores=1,
                                           memory_per_node=MiB(100),
                                           walltime=Walltime(minutes=30))
        run_tunnel_test(user=user, nodes=nodes)
示例#25
0
def test_able_to_reach_nodes_when_using_password_based_authentication():
    """It should be possible to connect to compute nodes even when using
        password-based authentication, because local public key is authorized
        for the compute nodes after initial connection.
        However, direct connection from access node should fail.
        Password is still used between the client and the access node."""
    user = USER_10
    with ExitStack() as stack:
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user=user, auth=AuthMethod.ASK))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(disable_pytest_stdin())
        cluster = show_cluster(TEST_CLUSTER)
        node = cluster.get_access_node()

        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))
        print(nodes)

        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        compute_node = nodes[0]
        assert isinstance(compute_node, NodeInternal)

        public_key_value = get_public_key_value()

        # Local key was installed for the deployed sshd, allowing access
        # between the access node and compute nodes.
        assert nodes[0].run('whoami') == user

        # Local key was not installed for the access node
        with pytest.raises(RuntimeError):
            node.run("grep '{public_key_value}' ~/.ssh/authorized_keys".format(
                public_key_value=public_key_value))

        # But it was installed for compute nodes.
        node.run("grep '{public_key_value}'"
                 " ~/.ssh/authorized_keys.idact".format(
                     public_key_value=public_key_value))

        check_direct_access_from_access_node_does_not_work(nodes[0])
示例#26
0
def test_dask_deployment_with_redeploy_on_validation_failure():
    user = USER_41
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        assert idact.detail.dask.deploy_dask_impl.validate_worker
        stored_validate_worker = \
            idact.detail.dask.deploy_dask_impl.validate_worker

        fake_validation_counter = [0]

        # pylint: disable=unused-argument
        def fake_validate_worker(worker: DaskWorkerDeployment):
            current_count = fake_validation_counter[0]
            fake_validation_counter[0] = current_count + 1

            print("Fake worker validation.")
            if current_count == 0:
                raise RuntimeError("Fake worker validation: First node fail.")
            print("Deciding the worker is valid.")

        try:
            idact.detail.dask.deploy_dask_impl.validate_worker = \
                fake_validate_worker

            with deploy_dask_on_testing_cluster(nodes):
                pass

            assert fake_validation_counter[0] == 3

        finally:
            idact.detail.dask.deploy_dask_impl.validate_worker = \
                stored_validate_worker
示例#27
0
def test_migrate_deployments():
    """Migrating from an old version of the deployments file should work
        without fatal errors."""
    user = USER_57
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)
        access_node = cluster.get_access_node()
        assert isinstance(access_node, NodeInternal)

        def check_deployments_file_exists():
            access_node.run("cat ~/.idact/.deployments")

        nodes = cluster.allocate_nodes()
        stack.enter_context(cancel_on_exit(nodes))
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        with pytest.raises(RuntimeError):
            check_deployments_file_exists()

        remote_path = access_node.run("echo ~/.idact/.deployments")
        put_file_on_node(node=access_node,
                         remote_path=remote_path,
                         contents='{{"type": "{type}"}}'.format(
                             type=SerializableTypes.DEPLOYMENT_DEFINITIONS))

        deployments = cluster.pull_deployments()
        assert not deployments.nodes

        cluster.push_deployment(deployment=nodes)

        deployments = cluster.pull_deployments()
        assert len(deployments.nodes) == 1

        cluster.clear_pushed_deployments()

        with pytest.raises(RuntimeError):
            check_deployments_file_exists()
示例#28
0
def test_dask_deployment_with_setup_actions():
    user = USER_18
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        cluster.config.setup_actions.dask = ['echo ABC > file.txt',
                                             'mv file.txt file2.txt']
        with deploy_dask_on_testing_cluster(nodes) as node:
            assert node.run("cat file2.txt") == "ABC"
示例#29
0
def test_dask_deployment_with_absolute_scratch_path():
    user = USER_24
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        cluster.config.scratch = '/home/user-24'

        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=10))
        stack.enter_context(cancel_on_exit(nodes))

        with deploy_dask_on_testing_cluster(nodes):
            pass
示例#30
0
def test_basic():
    user = USER_1
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        clusters = show_clusters()
        print(clusters)

        assert len(clusters) == 1

        cluster = show_cluster(name=TEST_CLUSTER)
        print(cluster)

        assert clusters[TEST_CLUSTER] == cluster

        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30),
                                       native_args={'--partition': 'debug'})
        with cancel_on_exit(nodes):
            assert len(nodes) == 2
            assert nodes[0] in nodes
            print(nodes)
            assert str(nodes) == repr(nodes)

            nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes.running()

            print(nodes)
            print(nodes[0])

            assert nodes[0].run('whoami') == user
            assert nodes[1].run('whoami') == user

        assert not nodes.running()
        with pytest.raises(RuntimeError):
            nodes.wait()
        with pytest.raises(RuntimeError):
            nodes[0].run('whoami')