Пример #1
0
def test_cancelled_dask_allocation_is_discarded_on_pull():
    user = USER_56
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        stack.enter_context(cancel_on_exit(nodes))
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        dask = deploy_dask(nodes)
        stack.enter_context(cancel_on_exit(dask))

        try:
            deployments = cluster.pull_deployments()
            assert not deployments.jupyter_deployments

            cluster.push_deployment(deployment=dask)

            dask.cancel()
            dask = None

            deployments = cluster.pull_deployments()
            assert not deployments.jupyter_deployments
        finally:
            if dask is not None:
                dask.cancel()
def test_able_to_sync_jupyter():
    user = USER_47
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        stack.enter_context(cancel_on_exit(nodes))
        node = nodes[0]
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        local_port = get_free_local_port()
        jupyter = node.deploy_notebook(local_port=local_port)
        stack.enter_context(cancel_on_exit(jupyter))

        deployments = cluster.pull_deployments()
        assert not deployments.jupyter_deployments

        cluster.push_deployment(deployment=jupyter)
        deployments = cluster.pull_deployments()
        print(deployments)

        assert len(deployments.jupyter_deployments) == 1
        jupyter_2 = deployments.jupyter_deployments[0]
        try:
            assert jupyter.local_port != jupyter_2.local_port
            check_local_http_connection(port=jupyter.local_port)
            check_local_http_connection(port=jupyter_2.local_port)
        finally:
            jupyter_2.cancel_local()
Пример #3
0
def test_generate_key_when_location_is_taken():
    """Key location is taken, must fall back."""
    user = LOCAL_USER_7
    random.seed(571303)
    with set_up_key_location(user):
        def get_expected_path(file_name: str) -> str:
            return os.path.join(os.environ['IDACT_KEY_LOCATION'], file_name)

        files_to_create = ['id_rsa_in',
                           'id_rsa_sy.pub',
                           'id_rsa_ov',
                           'id_rsa_ov.pub',
                           'id_rsa_d9']

        for file in files_to_create:
            with open(get_expected_path(file), 'w'):
                pass

        assert generate_key(host='host0', key_type=KeyType.RSA) == (
            get_expected_path('id_rsa_wg9o'))
        assert generate_key(host='host1', key_type=KeyType.RSA) == (
            get_expected_path('id_rsa_t1'))
        check_key_pair(host='host0',
                       path=get_expected_path('id_rsa_wg9o'))
        check_key_pair(host='host1',
                       path=get_expected_path('id_rsa_t1'))
Пример #4
0
def test_nodes_sync_does_not_work_when_waiting_twice():
    """Port info was already deleted, so waiting for the second time defaults
        to port 22."""
    user = USER_44
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        with cancel_on_exit(nodes):
            cluster.push_deployment(deployment=nodes)

            nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes.running()
            node = nodes[0]
            assert node.port != 22

            deployments = cluster.pull_deployments()
            assert len(deployments.nodes) == 1
            nodes_2 = deployments.nodes[0]

            nodes_2.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes_2.running()
            node_2 = nodes_2[0]

            assert node_2.port == 22
            assert node_2.host == node.host
Пример #5
0
def test_get_free_private_key_location_when_location_is_taken():
    """Some paths are taken, use fallbacks."""
    user = LOCAL_USER_4
    random.seed(571303)
    assert KEY_NAME_SUFFIX_RETRIES == 4
    with set_up_key_location(user):

        def get_expected_path(file_name: str) -> str:
            return os.path.join(os.environ['IDACT_KEY_LOCATION'], file_name)

        files_to_create = [
            'id_rsa_in', 'id_rsa_sy.pub', 'id_rsa_ov', 'id_rsa_ov.pub',
            'id_rsa_d9'
        ]

        for file in files_to_create:
            with open(get_expected_path(file), 'w'):
                pass

        def get_path():
            return get_free_private_key_location(key_type=KeyType.RSA)

        assert get_path() == get_expected_path('id_rsa_wg9o')
        assert get_path() == get_expected_path('id_rsa_t1')
        assert get_path() == get_expected_path('id_rsa_4j')
Пример #6
0
def test_get_free_private_key_location_when_all_locations_are_taken():
    """All generated file paths are taken, even with fallbacks."""
    user = LOCAL_USER_5
    random.seed(571303)

    with set_up_key_location(user):

        def get_expected_path(file_name: str) -> str:
            return os.path.join(os.environ['IDACT_KEY_LOCATION'], file_name)

        files_to_create = [
            'id_rsa_in', 'id_rsa_sy', 'id_rsa_ov', 'id_rsa_d9', 'id_rsa_wg9o',
            'id_rsa_t14j', 'id_rsa_dwud', 'id_rsa_oa2v', 'id_rsa_9tybnbx2',
            'id_rsa_aspfb1np', 'id_rsa_vy5345nr', 'id_rsa_c7gl4y4p',
            'id_rsa_3w95f8974vbce8um', 'id_rsa_a56h9p6ez6e5p3pk',
            'id_rsa_vx3bq5hwplvbwzgo', 'id_rsa_1ypfko2yz2vypimt',
            'id_rsa_h8sqyu1avr7f5hwo3geil5nt8rkb9rx4',
            'id_rsa_4xq0hw5qvhb9edcp9o2g5id1wmjbq1ro',
            'id_rsa_f7c2epry1u1qzpac4hb7gymb3r3s3iex',
            'id_rsa_im4sg2s7euj8j4h2no03qvhel4lcg9l1'
        ]

        for file in files_to_create:
            with open(get_expected_path(file), 'w'):
                pass

        with pytest.raises(RuntimeError):
            print(get_free_private_key_location(key_type=KeyType.RSA))
Пример #7
0
def test_cancelled_node_allocation_is_discarded_on_pull():
    user = USER_45
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        try:
            nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes.running()

            cluster.push_deployment(deployment=nodes)

            deployments = cluster.pull_deployments()
            assert len(deployments.nodes) == 1
            assert deployments.nodes[0].running()
            nodes.cancel()
            nodes = None

            deployments = cluster.pull_deployments()
            assert not deployments.nodes
        finally:
            if nodes is not None:
                nodes.cancel()
Пример #8
0
def test_dask_deployment_with_redeploy_failure():
    user = USER_42
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        assert idact.detail.dask.deploy_dask_impl.validate_worker
        stored_validate_worker = \
            idact.detail.dask.deploy_dask_impl.validate_worker

        def fake_validate_worker(worker: DaskWorkerDeployment):
            print("Fake worker validation.")
            raise ValueError("Fake worker validation fail.")

        try:
            idact.detail.dask.deploy_dask_impl.validate_worker = \
                fake_validate_worker

            with pytest.raises(RuntimeError):
                with deploy_dask_on_testing_cluster(nodes):
                    pass

        finally:
            idact.detail.dask.deploy_dask_impl.validate_worker = \
                stored_validate_worker
Пример #9
0
def test_remove_runtime_dir_test():
    user = USER_15
    with ExitStack() as stack:
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))
        node = nodes[0]
        try:
            nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes.running()

            check_will_remove_empty(node=node)
            check_will_ignore_non_existent(node=node)
            check_will_remove_files(node=node)
            check_will_not_remove_dotfiles(node=node)
            check_will_not_remove_nested_dirs(node=node)
        finally:
            node.run("rm -rf *")
Пример #10
0
def test_generate_and_install_key_on_access_node():
    with ExitStack() as stack:
        user = USER_8
        stack.enter_context(clear_environment(user))
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(disable_pytest_stdin())

        with pytest.raises(ValueError):  # No key provided.
            add_cluster(name=TEST_CLUSTER,
                        user=user,
                        host=get_testing_host(),
                        port=get_testing_port(),
                        auth=AuthMethod.PUBLIC_KEY,
                        key=None,
                        install_key=True)

        # Generate RSA key.
        add_cluster(name=TEST_CLUSTER,
                    user=user,
                    host=get_testing_host(),
                    port=get_testing_port(),
                    auth=AuthMethod.PUBLIC_KEY,
                    key=KeyType.RSA,
                    install_key=True,
                    retries={Retry.PORT_INFO: set_retry(count=0)})

        check_remote_key_and_node_access(stack=stack, user=user)
Пример #11
0
def test_node_tunnel_fall_back_when_local_port_taken():
    """Checks that a tunnel will fall back to a random port if local port is
        taken."""
    user = USER_53
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        node = nodes[0]
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        there = get_free_remote_port(node=node)
        here = get_free_local_port()

        tunnel_1 = node.tunnel(there=there, here=here)
        stack.enter_context(close_tunnel_on_exit(tunnel_1))
        assert tunnel_1.here == here

        tunnel_2 = node.tunnel(there=there, here=here)
        stack.enter_context(close_tunnel_on_exit(tunnel_2))
        assert tunnel_2.here != here
Пример #12
0
def test_able_to_sync_dask():
    user = USER_55
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        stack.enter_context(cancel_on_exit(nodes))
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        dask = deploy_dask(nodes)
        stack.enter_context(cancel_on_exit(dask))

        deployments = cluster.pull_deployments()
        assert not deployments.dask_deployments

        cluster.push_deployment(deployment=dask)
        deployments = cluster.pull_deployments()
        print(deployments)

        assert len(deployments.dask_deployments) == 1
        dask_2 = deployments.dask_deployments[0]
        try:
            assert dask.diagnostics.addresses != dask_2.diagnostics.addresses
            for url in dask.diagnostics.addresses:
                check_http_connection(url=url)
            for url in dask_2.diagnostics.addresses:
                check_http_connection(url=url)
        finally:
            dask_2.cancel_local()
Пример #13
0
def test_cancelled_jupyter_allocation_is_discarded_on_pull():
    user = USER_48
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        stack.enter_context(cancel_on_exit(nodes))
        node = nodes[0]
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        local_port = get_free_local_port()
        jupyter = node.deploy_notebook(local_port=local_port)
        try:
            deployments = cluster.pull_deployments()
            assert not deployments.jupyter_deployments

            cluster.push_deployment(deployment=jupyter)

            jupyter.cancel()
            jupyter = None

            deployments = cluster.pull_deployments()
            assert not deployments.jupyter_deployments
        finally:
            if jupyter is not None:
                jupyter.cancel()
Пример #14
0
def test_able_to_sync_nodes_before_and_after_wait():
    user = USER_43
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)

        nodes = cluster.allocate_nodes()
        nodes_2 = None
        nodes_3 = None
        try:
            deployments = cluster.pull_deployments()
            assert not deployments.nodes

            cluster.push_deployment(deployment=nodes)
            nodes = None
            deployments = cluster.pull_deployments()
            print(deployments)

            assert len(deployments.nodes) == 1
            nodes_2 = deployments.nodes[0]
            assert len(nodes_2) == 1
            nodes_2.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes_2.running()
            node_2 = nodes_2[0]

            cluster.push_deployment(deployment=nodes_2)
            nodes_2 = None
            deployments = cluster.pull_deployments()
            print(deployments)

            assert len(deployments.nodes) == 1
            nodes_3 = deployments.nodes[0]

            assert nodes_3.running()
            with pytest.raises(RuntimeError):
                nodes_3.wait()
            assert len(nodes_3) == 1
            node_3 = nodes_3[0]

            assert node_2.host == node_3.host
            assert node_2.port == node_3.port
            assert node_3.resources.cpu_cores == 1
            assert node_3.resources.memory_total == bitmath.GiB(1)
            print(node_3)

            assert node_3.run('whoami') == user
        finally:
            if nodes is not None:
                nodes.cancel()
            if nodes_2 is not None:
                nodes_2.cancel()
            if nodes_3 is not None:
                nodes_3.cancel()
Пример #15
0
def test_cannot_deploy_dask_on_zero_nodes():
    user = USER_20
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        print(show_cluster(name=TEST_CLUSTER))
        with pytest.raises(ValueError):
            deploy_dask(nodes=[])
Пример #16
0
def test_node_tunnel_stress():
    user = USER_40
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100))
        stack.enter_context(cancel_on_exit(nodes))
        run_tunnel_stress_test(stack=stack, user=user, nodes=nodes)
Пример #17
0
def test_node_tunnel():
    """Allocates a node and creates a tunnel."""
    user = USER_5
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        run_tunnel_test(user=user, nodes=nodes)
Пример #18
0
def test_node_tunnel_fall_back_when_local_port_free_but_fails():
    """Checks that a tunnel will fall back to a random port if local port is
        is initially free, but tunnel cannot be created anyway (e.g. another
        process binds to it at the last moment)."""
    user = USER_54
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        node = nodes[0]
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        there = get_free_remote_port(node=node)
        here = get_free_local_port()

        real_build_tunnel = idact.detail.nodes.node_impl.build_tunnel
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        tries = [0]

        def fake_build_tunnel(*args, **kwargs) -> TunnelInternal:
            tries[0] += 1
            if tries[0] == 1:
                raise RuntimeError("Fake failure.")
            if tries[0] != 2:
                assert False

            return real_build_tunnel(*args, **kwargs)

        try:
            idact.detail.nodes.node_impl.build_tunnel = fake_build_tunnel
            tunnel = node.tunnel(there=there, here=here)
            stack.enter_context(close_tunnel_on_exit(tunnel))
            assert tries[0] == 2
            assert tunnel.here != here
        finally:
            idact.detail.nodes.node_impl.build_tunnel = real_build_tunnel
            sock.close()
Пример #19
0
def test_jupyter_deployment():
    user = USER_6
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        with deploy_jupyter(nodes):
            pass
Пример #20
0
def test_generate_and_install_key_no_sshd():
    with ExitStack() as stack:
        user = USER_14
        stack.enter_context(clear_environment(user))
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(disable_pytest_stdin())

        add_cluster(name=TEST_CLUSTER,
                    user=user,
                    host=get_testing_host(),
                    port=get_testing_port(),
                    auth=AuthMethod.PUBLIC_KEY,
                    key=KeyType.RSA,
                    install_key=True,
                    disable_sshd=True,
                    retries={Retry.PORT_INFO: set_retry(count=0)})

        check_remote_key_and_node_access(stack=stack, user=user)
Пример #21
0
def test_node_tunnel_public_key():
    """Allocates a node and creates a tunnel, uses public key authentication.
    """
    user = USER_13
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(
            reset_environment(user=user, auth=AuthMethod.PUBLIC_KEY))

        cluster = show_cluster(name=TEST_CLUSTER)

        with set_password(get_test_user_password(user)):
            nodes = cluster.allocate_nodes(nodes=1,
                                           cores=1,
                                           memory_per_node=MiB(100),
                                           walltime=Walltime(minutes=30))
        run_tunnel_test(user=user, nodes=nodes)
Пример #22
0
def test_able_to_reach_nodes_when_using_password_based_authentication():
    """It should be possible to connect to compute nodes even when using
        password-based authentication, because local public key is authorized
        for the compute nodes after initial connection.
        However, direct connection from access node should fail.
        Password is still used between the client and the access node."""
    user = USER_10
    with ExitStack() as stack:
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user=user, auth=AuthMethod.ASK))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(disable_pytest_stdin())
        cluster = show_cluster(TEST_CLUSTER)
        node = cluster.get_access_node()

        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))
        print(nodes)

        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        compute_node = nodes[0]
        assert isinstance(compute_node, NodeInternal)

        public_key_value = get_public_key_value()

        # Local key was installed for the deployed sshd, allowing access
        # between the access node and compute nodes.
        assert nodes[0].run('whoami') == user

        # Local key was not installed for the access node
        with pytest.raises(RuntimeError):
            node.run("grep '{public_key_value}' ~/.ssh/authorized_keys".format(
                public_key_value=public_key_value))

        # But it was installed for compute nodes.
        node.run("grep '{public_key_value}'"
                 " ~/.ssh/authorized_keys.idact".format(
                     public_key_value=public_key_value))

        check_direct_access_from_access_node_does_not_work(nodes[0])
Пример #23
0
def test_basic():
    user = USER_1
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        clusters = show_clusters()
        print(clusters)

        assert len(clusters) == 1

        cluster = show_cluster(name=TEST_CLUSTER)
        print(cluster)

        assert clusters[TEST_CLUSTER] == cluster

        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30),
                                       native_args={'--partition': 'debug'})
        with cancel_on_exit(nodes):
            assert len(nodes) == 2
            assert nodes[0] in nodes
            print(nodes)
            assert str(nodes) == repr(nodes)

            nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes.running()

            print(nodes)
            print(nodes[0])

            assert nodes[0].run('whoami') == user
            assert nodes[1].run('whoami') == user

        assert not nodes.running()
        with pytest.raises(RuntimeError):
            nodes.wait()
        with pytest.raises(RuntimeError):
            nodes[0].run('whoami')
Пример #24
0
def test_dask_deployment_with_redeploy_on_validation_failure():
    user = USER_41
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        assert idact.detail.dask.deploy_dask_impl.validate_worker
        stored_validate_worker = \
            idact.detail.dask.deploy_dask_impl.validate_worker

        fake_validation_counter = [0]

        # pylint: disable=unused-argument
        def fake_validate_worker(worker: DaskWorkerDeployment):
            current_count = fake_validation_counter[0]
            fake_validation_counter[0] = current_count + 1

            print("Fake worker validation.")
            if current_count == 0:
                raise RuntimeError("Fake worker validation: First node fail.")
            print("Deciding the worker is valid.")

        try:
            idact.detail.dask.deploy_dask_impl.validate_worker = \
                fake_validate_worker

            with deploy_dask_on_testing_cluster(nodes):
                pass

            assert fake_validation_counter[0] == 3

        finally:
            idact.detail.dask.deploy_dask_impl.validate_worker = \
                stored_validate_worker
Пример #25
0
def test_dask_deployment_with_absolute_scratch_path():
    user = USER_24
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        cluster.config.scratch = '/home/user-24'

        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=10))
        stack.enter_context(cancel_on_exit(nodes))

        with deploy_dask_on_testing_cluster(nodes):
            pass
Пример #26
0
def test_dask_deployment_with_setup_actions():
    user = USER_18
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        cluster.config.setup_actions.dask = ['echo ABC > file.txt',
                                             'mv file.txt file2.txt']
        with deploy_dask_on_testing_cluster(nodes) as node:
            assert node.run("cat file2.txt") == "ABC"
Пример #27
0
def test_migrate_deployments():
    """Migrating from an old version of the deployments file should work
        without fatal errors."""
    user = USER_57
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(clear_deployment_sync_data(user))

        cluster = show_cluster(name=TEST_CLUSTER)
        access_node = cluster.get_access_node()
        assert isinstance(access_node, NodeInternal)

        def check_deployments_file_exists():
            access_node.run("cat ~/.idact/.deployments")

        nodes = cluster.allocate_nodes()
        stack.enter_context(cancel_on_exit(nodes))
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        with pytest.raises(RuntimeError):
            check_deployments_file_exists()

        remote_path = access_node.run("echo ~/.idact/.deployments")
        put_file_on_node(node=access_node,
                         remote_path=remote_path,
                         contents='{{"type": "{type}"}}'.format(
                             type=SerializableTypes.DEPLOYMENT_DEFINITIONS))

        deployments = cluster.pull_deployments()
        assert not deployments.nodes

        cluster.push_deployment(deployment=nodes)

        deployments = cluster.pull_deployments()
        assert len(deployments.nodes) == 1

        cluster.clear_pushed_deployments()

        with pytest.raises(RuntimeError):
            check_deployments_file_exists()
Пример #28
0
def test_get_free_private_key_location_when_location_is_free():
    """Free paths."""
    user = LOCAL_USER_3
    random.seed(571303)
    result_paths = []
    with set_up_key_location(user):
        for _ in range(8):
            result_paths += [
                get_free_private_key_location(key_type=KeyType.RSA)
            ]
    expected_file_names = [
        'id_rsa_in', 'id_rsa_sy', 'id_rsa_ov', 'id_rsa_d9', 'id_rsa_wg',
        'id_rsa_9o', 'id_rsa_t1', 'id_rsa_4j'
    ]
    expected_paths = [
        os.path.join(os.environ['IDACT_KEY_LOCATION'], i)
        for i in expected_file_names
    ]
    assert result_paths == expected_paths
Пример #29
0
def test_can_read_node_resources():
    user = USER_39
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)

        access_node = cluster.get_access_node()
        print(str(access_node))
        assert str(access_node) == repr(access_node)

        assert access_node.resources.cpu_cores is None
        assert access_node.resources.memory_total is None
        start_stress_cpu(user=user, timeout=10)
        try:
            check_resources_in_believable_range(access_node.resources)
        finally:
            stop_stress_cpu(user=user)

        nodes = cluster.allocate_nodes(cores=1,
                                       memory_per_node=bitmath.GiB(0.8))

        assert len(nodes) == 1
        node = nodes[0]

        stack.enter_context(cancel_on_exit(nodes))

        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
        assert nodes.running()

        assert node.resources.cpu_cores == 1
        assert node.resources.memory_total == bitmath.GiB(0.8)
        start_stress_cpu(user=user, timeout=10)
        try:
            check_resources_in_believable_range(access_node.resources)
        finally:
            stop_stress_cpu(user=user)

        assert node.run('whoami') == user
Пример #30
0
def run_notebook_app(user: str,
                     environment_file: str,
                     args: List[str],
                     notebook_defaults: dict = None) -> Result:
    """Runs the notebook app. Returns the result from CliRunner."""
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(
            reset_environment(user=user, auth=AuthMethod.PUBLIC_KEY))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        cluster.get_access_node().connect()
        set_log_level(logging.INFO)
        if notebook_defaults:
            config = cluster.config
            assert isinstance(config, ClusterConfigImpl)
            config.notebook_defaults = notebook_defaults
        save_environment(path=environment_file)

        try:
            runner = CliRunner()
            opened_in_browser = []

            def fake_open_in_browser(_):
                opened_in_browser.append(True)

            saved_open_in_browser = JupyterDeploymentImpl.open_in_browser
            JupyterDeploymentImpl.open_in_browser = fake_open_in_browser
            try:
                os.environ[NOTEBOOK_TEST_RUN_ENVIRONMENT_VARIABLE] = ''
                result = runner.invoke(main, args=args)
            finally:
                JupyterDeploymentImpl.open_in_browser = saved_open_in_browser
                del os.environ[NOTEBOOK_TEST_RUN_ENVIRONMENT_VARIABLE]
            print("\n\n\nClick output of the notebook app run:")
            print(result.output)
            yield result
        finally:
            os.remove(environment_file)