def deploy_jupyter(nodes: Nodes): ps_jupyter = "ps -u $USER | grep jupyter ; exit 0" node = nodes[0] nodes.wait(timeout=SLURM_WAIT_TIMEOUT) assert nodes.running() local_port = get_free_local_port() deployment = node.deploy_notebook(local_port=local_port) with cancel_on_exit(deployment): print(deployment) assert str(deployment) == repr(deployment) assert deployment.local_port == local_port ps_jupyter_lines = node.run(ps_jupyter).splitlines() pprint(ps_jupyter_lines) assert len(ps_jupyter_lines) == 1 check_local_http_connection(port=local_port) yield node retry(lambda: check_no_output(node=node, command=ps_jupyter), retries=5 * get_testing_process_count(), seconds_between_retries=1)
def test_third_try_succeeds_one_retry(): failures = [] with pytest.raises(RuntimeError): retry(fun=lambda: failing_task(fail_times=2, failures=failures), retries=1, seconds_between_retries=0) assert failures == [0, 1]
def test_no_retries(): failures = [] with pytest.raises(RuntimeError): retry(fun=lambda: failing_task(fail_times=1, failures=failures), retries=0, seconds_between_retries=0) assert failures == [0]
def deploy_dask_on_testing_cluster(nodes: Nodes): ps_dask_worker = "ps -u $USER | grep [d]ask-worker ; exit 0" ps_dask_scheduler = "ps -u $USER | grep [d]ask-scheduler ; exit 0" node = nodes[0] nodes.wait(timeout=SLURM_WAIT_TIMEOUT) assert nodes.running() ps_lines = node.run(ps_dask_worker).splitlines() pprint(ps_lines) assert not ps_lines deployment = deploy_dask(nodes=nodes) with cancel_on_exit(deployment): print(deployment) assert str(deployment) == repr(deployment) ps_lines = node.run(ps_dask_scheduler).splitlines() pprint(ps_lines) assert len(ps_lines) == 1 ps_lines = node.run(ps_dask_worker).splitlines() pprint(ps_lines) # some workers may have been redeployed assert len(ps_lines) >= len(nodes) client = deployment.get_client() print(client) check_submission_works(node=node, client=client) pprint(deployment.diagnostics.addresses) assert len(deployment.diagnostics.addresses) == len(nodes) + 1 for address in deployment.diagnostics.addresses: request = requests.get(address) assert "text/html" in request.headers['Content-type'] opened_addresses = [] with save_opened_in(opened_addresses): deployment.diagnostics.open_all() assert opened_addresses == deployment.diagnostics.addresses yield node retry(lambda: check_no_output(node=node, command=ps_dask_scheduler), retries=5 * get_testing_process_count(), seconds_between_retries=1) retry(lambda: check_no_output(node=node, command=ps_dask_worker), retries=5 * get_testing_process_count(), seconds_between_retries=1)
def test_allocation_should_default_to_port_22_if_port_info_file_is_missing(): user = USER_61 with ExitStack() as stack: stack.enter_context(disable_pytest_stdin()) stack.enter_context(set_up_key_location(user)) stack.enter_context(reset_environment(user)) stack.enter_context(set_password(get_test_user_password(user))) cluster = show_cluster(name=TEST_CLUSTER) node = cluster.get_access_node() nodes = cluster.allocate_nodes(memory_per_node=MiB(100)) stack.enter_context(cancel_on_exit(nodes)) retry(lambda: node.run("rm ~/.idact/sshd_ports/alloc-*/*"), retries=SLURM_WAIT_TIMEOUT, seconds_between_retries=1) nodes.wait(timeout=SLURM_WAIT_TIMEOUT) assert nodes.running() assert nodes[0].port == 22
def run_tunnel_test(user: str, nodes: Nodes): node = nodes[0] nodes.wait(timeout=SLURM_WAIT_TIMEOUT) assert nodes.running() with ExitStack() as stack: stack.enter_context(cancel_on_exit(nodes)) there = get_free_remote_port(node=nodes[0]) here = get_free_local_port() server = start_dummy_server_thread(user=user, server_port=there) stack.enter_context(join_on_exit(server)) tunnel = node.tunnel(there=there, here=here) stack.enter_context(close_tunnel_on_exit(tunnel)) print(tunnel) assert str(tunnel) == repr(tunnel) assert tunnel.here == here assert tunnel.there == there def access_dummy_server(): return requests.get( "http://127.0.0.1:{local_port}".format(local_port=here)) request = retry(access_dummy_server, retries=5 * get_testing_process_count(), seconds_between_retries=2) assert "text/html" in request.headers['Content-type'] ssh_tunnel = node.tunnel_ssh() stack.enter_context(close_tunnel_on_exit(ssh_tunnel)) assert str(ssh_tunnel) == repr(ssh_tunnel) assert str(ssh_tunnel).startswith("ssh ") assert user in str(ssh_tunnel) assert str(ssh_tunnel.here) in str(ssh_tunnel) assert ssh_tunnel.there == node.port assert not nodes.running() with pytest.raises(RuntimeError): nodes.wait() with pytest.raises(RuntimeError): node.tunnel(there=there, here=here)
def run_tunnel_test_for_bindings(user: str, bindings: List[Binding]): """Runs a tunneling test for a binding sequence. Runs a Python server in a separate thread through ssh, then creates a multi-hop tunnel, and finally performs a HTTP request to the local address. :param user: Test user. :param bindings: Sequence of tunnel bindings. """ config = ClusterConfigImpl(host=get_testing_host(), port=get_testing_port(), user=user, auth=AuthMethod.ASK) local_port = bindings[0].port server_port = bindings[-1].port with ExitStack() as stack: tunnel = build_tunnel(config=config, bindings=bindings, ssh_password=get_test_user_password(user)) stack.enter_context(close_tunnel_on_exit(tunnel)) server = start_dummy_server_thread(user=user, server_port=server_port) stack.enter_context(join_on_exit(server)) assert tunnel.here == local_port assert tunnel.there == server_port def access_dummy_server(): return requests.get( "http://127.0.0.1:{local_port}".format(local_port=local_port)) request = retry(access_dummy_server, retries=5 * get_testing_process_count(), seconds_between_retries=2) assert "text/html" in request.headers['Content-type']
def test_second_try_succeeds_some_retries(): failures = [] assert retry(fun=lambda: failing_task(fail_times=1, failures=failures), retries=3, seconds_between_retries=0) == 123 assert failures == [0]
def test_first_try_succeeds_one_retry(): failures = [] assert retry(fun=lambda: failing_task(fail_times=0, failures=failures), retries=1, seconds_between_retries=0) == 123 assert failures == []