def test_job_save(helper: Helper, docker: aiodocker.Docker) -> None:
    job_name = f"test-job-save-{uuid4().hex[:6]}"
    image = f"test-image:{job_name}"
    image_neuro_name = f"image://{helper.cluster_name}/{helper.username}/{image}"
    command = "sh -c 'echo -n 123 > /test; sleep 10m'"
    job_id_1 = helper.run_job_and_wait_state(
        ALPINE_IMAGE_NAME, command=command, wait_state=JobStatus.RUNNING
    )
    img_uri = f"image://{helper.cluster_name}/{helper.username}/{image}"
    captured = helper.run_cli(["job", "save", job_id_1, image_neuro_name])
    out = captured.out
    assert f"Saving job '{job_id_1}' to image '{img_uri}'..." in out
    assert f"Using remote image '{img_uri}'" in out
    assert "Creating image from the job container" in out
    assert "Image created" in out
    assert f"Using local image '{helper.username}/{image}'" in out
    assert "Pushing image..." in out
    assert out.endswith(img_uri)

    # wait to free the job name:
    helper.run_cli(["job", "kill", job_id_1])
    helper.wait_job_change_state_to(job_id_1, JobStatus.CANCELLED)

    command = 'sh -c \'[ "$(cat /test)" = "123" ]\''
    helper.run_job_and_wait_state(
        image_neuro_name, command=command, wait_state=JobStatus.SUCCEEDED
    )
def test_job_run_volume_all(helper: Helper) -> None:
    root_mountpoint = "/var/neuro"
    cmd = " && ".join(
        [
            f"[ -d {root_mountpoint}/{helper.username} ]",
            f"[ -d {root_mountpoint}/neuromation ]",  # must be public
            f"[ $NEUROMATION_ROOT == {root_mountpoint} ]",
            f"[ $NEUROMATION_HOME == {root_mountpoint}/{helper.username} ]",
        ]
    )
    command = f"bash -c '{cmd}'"
    img = UBUNTU_IMAGE_NAME

    with pytest.raises(subprocess.CalledProcessError) as cm:
        # first, run without --volume=ALL
        captured = helper.run_cli(["--quiet", "run", "-T", img, command])
    assert cm.value.returncode == 1

    # then, run with --volume=ALL
    captured = helper.run_cli(["run", "-T", "--volume=ALL", img, command])
    msg = (
        "Storage mountpoints will be available as the environment variables:\n"
        f"  NEUROMATION_ROOT={root_mountpoint}\n"
        f"  NEUROMATION_HOME={root_mountpoint}/{helper.username}"
    )
    assert msg in captured.out
    found_job_ids = re.findall("Job ID: (job-.+)", captured.out)
    assert len(found_job_ids) == 1
    job_id = found_job_ids[0]
    helper.wait_job_change_state_to(
        job_id, JobStatus.SUCCEEDED, stop_state=JobStatus.FAILED
    )
def test_job_run_home_volumes_automount(helper: Helper,
                                        fakebrowser: Any) -> None:
    command = "[ -d /var/storage/home -a -d /var/storage/neuromation ]"

    with pytest.raises(subprocess.CalledProcessError) as cm:
        # first, run without --volume=HOME
        helper.run_cli([
            "-q",
            "job",
            "run",
            "--detach",
            "--preset=cpu-micro",
            UBUNTU_IMAGE_NAME,
            command,
        ])

    assert cm.value.returncode == 125

    # then, run with --volume=HOME
    capture = helper.run_cli([
        "-q",
        "job",
        "run",
        "--detach",
        "--preset=cpu-micro",
        "--volume",
        "HOME",
        UBUNTU_IMAGE_NAME,
        command,
    ])

    job_id_2 = capture.out
    helper.wait_job_change_state_to(job_id_2, JobStatus.SUCCEEDED,
                                    JobStatus.FAILED)
def test_pass_config(image: str, helper: Helper) -> None:
    # Let`s push image
    captured = helper.run_cli(["image", "push", image])

    image_full_str = f"image://{helper.username}/{image}"
    assert captured.out.endswith(image_full_str)

    command = 'bash -c "neuro config show"'
    # Run a new job
    captured = helper.run_cli([
        "job",
        "run",
        "-q",
        "-s",
        JOB_TINY_CONTAINER_PRESET,
        "--no-wait-start",
        "--pass-config",
        image_full_str,
        command,
    ])
    job_id = captured.out

    # sleep(1)

    # Wait until the job is running
    helper.wait_job_change_state_to(job_id, JobStatus.SUCCEEDED)

    # Verify exit code is returned
    captured = helper.run_cli(["job", "status", job_id])
    store_out = captured.out
    assert "Exit code: 0" in store_out
def test_job_description(helper: Helper) -> None:
    # Remember original running jobs
    captured = helper.run_cli(
        ["job", "ls", "--status", "running", "--status", "pending"]
    )
    store_out_list = captured.out.split("\n")[1:]
    jobs_orig = [x.split("  ")[0] for x in store_out_list]
    description = "Test description for a job"
    # Run a new job
    command = "bash -c 'sleep 10m; false'"
    captured = helper.run_cli(
        [
            "job",
            "submit",
            *JOB_TINY_CONTAINER_PARAMS,
            "--http",
            "80",
            "--description",
            description,
            "--non-preemptible",
            "--no-wait-start",
            UBUNTU_IMAGE_NAME,
            command,
        ]
    )
    match = re.match("Job ID: (.+)", captured.out)
    assert match is not None
    job_id = match.group(1)

    # Check it was not running before
    assert job_id.startswith("job-")
    assert job_id not in jobs_orig

    # Check it is in a running,pending job list now
    captured = helper.run_cli(
        ["job", "ls", "--status", "running", "--status", "pending"]
    )
    store_out_list = captured.out.split("\n")[1:]
    jobs_updated = [x.split("  ")[0] for x in store_out_list]
    assert job_id in jobs_updated

    # Wait until the job is running
    helper.wait_job_change_state_to(job_id, JobStatus.RUNNING, JobStatus.FAILED)

    # Check that it is in a running job list
    captured = helper.run_cli(["job", "ls", "--status", "running"])
    store_out = captured.out
    assert job_id in store_out
    # Check that description is in the list
    assert description in store_out
    assert command in store_out

    # Check that no description is in the list if quite
    captured = helper.run_cli(["-q", "job", "ls", "--status", "running"])
    store_out = captured.out
    assert job_id in store_out
    assert description not in store_out
    assert command not in store_out
    helper.kill_job(job_id, wait=False)
def test_job_run_exit_code(helper: Helper) -> None:
    # Run a new job
    command = 'bash -c "exit 101"'
    captured = helper.run_cli(
        ["-q", "job", "run", "--no-wait-start", UBUNTU_IMAGE_NAME, command]
    )
    job_id = captured.out

    # Wait until the job is running
    helper.wait_job_change_state_to(job_id, JobStatus.FAILED)

    # Verify exit code is returned
    captured = helper.run_cli(["job", "status", job_id])
    store_out = captured.out
    assert "Exit code: 101" in store_out
def test_pass_config(helper: Helper) -> None:
    captured = helper.run_cli(
        [
            "-q",
            "job",
            "run",
            "--no-wait-start",
            "--pass-config",
            UBUNTU_IMAGE_NAME,
            'bash -c "sleep 15 && test -f $(NEURO_STEAL_CONFIG)/db"',
        ]
    )
    job_id = captured.out

    # fails if "test -f ..." check is not succeeded
    helper.wait_job_change_state_to(
        job_id, JobStatus.SUCCEEDED, stop_state=JobStatus.FAILED
    )
def test_e2e_restart_failing(request: Any, helper: Helper) -> None:
    captured = helper.run_cli(
        [
            "-q",
            "job",
            "run",
            "--restart",
            "on-failure",
            "--detach",
            UBUNTU_IMAGE_NAME,
            "false",
        ]
    )
    job_id = captured.out
    request.addfinalizer(lambda: helper.kill_job(job_id, wait=False))

    captured = helper.run_cli(["job", "status", job_id])
    assert "Restart policy: on-failure" in captured.out.splitlines()

    helper.wait_job_change_state_to(job_id, JobStatus.RUNNING)
    sleep(1)
    helper.assert_job_state(job_id, JobStatus.RUNNING)
def test_job_description(helper: Helper) -> None:
    # Remember original running jobs
    captured = helper.run_cli(["job", "ls", "--status", "running"])
    description = str(uuid4())
    # Run a new job
    command = "bash -c 'sleep 15m; false'"
    captured = helper.run_cli(
        [
            "job",
            "run",
            "--http",
            "80",
            "--description",
            description,
            "--no-wait-start",
            UBUNTU_IMAGE_NAME,
            command,
        ]
    )
    match = re.match("Job ID: (.+)", captured.out)
    assert match is not None
    job_id = match.group(1)

    # Check it was not running before
    assert job_id.startswith("job-")

    # Wait until the job is running
    helper.wait_job_change_state_to(job_id, JobStatus.RUNNING, JobStatus.FAILED)

    # Check that it is in a running job list
    captured = helper.run_cli(
        ["job", "ls", "--status", "running", "--format", "{id}, {description}"]
    )
    store_out = captured.out
    assert job_id in store_out
    # Check that description is in the list
    assert description in store_out

    helper.kill_job(job_id, wait=False)
def test_job_run(helper: Helper) -> None:

    job_name = f"test-job-{os.urandom(5).hex()}"

    # Kill another active jobs with same name, if any
    # Pass --owner because --name without --owner is too slow for admin users.
    captured = helper.run_cli(
        ["-q", "job", "ls", "--owner", helper.username, "--name", job_name]
    )
    if captured.out:
        jobs_same_name = captured.out.split("\n")
        assert len(jobs_same_name) == 1, f"found multiple active jobs named {job_name}"
        job_id = jobs_same_name[0]
        helper.kill_job(job_id)

    # Remember original running jobs
    captured = helper.run_cli(
        ["job", "ls", "--status", "running", "--status", "pending"]
    )
    store_out_list = captured.out.split("\n")[1:]
    jobs_orig = [x.split("  ")[0] for x in store_out_list]

    captured = helper.run_cli(
        [
            "job",
            "run",
            "--http",
            "80",
            "--no-wait-start",
            "--restart",
            "never",
            "--name",
            job_name,
            UBUNTU_IMAGE_NAME,
            # use unrolled notation to check shlex.join()
            "bash",
            "-c",
            "sleep 10m; false",
        ]
    )
    match = re.match("Job ID: (.+)", captured.out)
    assert match is not None
    job_id = match.group(1)
    assert job_id.startswith("job-")
    assert job_id not in jobs_orig
    assert f"Name: {job_name}" in captured.out

    # Check it is in a running,pending job list now
    captured = helper.run_cli(
        ["job", "ls", "--status", "running", "--status", "pending"]
    )
    store_out_list = captured.out.split("\n")[1:]
    jobs_updated = [x.split("  ")[0] for x in store_out_list]
    assert job_id in jobs_updated

    # Wait until the job is running
    helper.wait_job_change_state_to(job_id, JobStatus.RUNNING)

    # Check that it is in a running job list
    captured = helper.run_cli(["job", "ls", "--status", "running"])
    store_out = captured.out
    assert job_id in store_out
    # Check that the command is in the list
    assert "bash -c 'sleep 10m; false'" in store_out
    helper.kill_job(job_id, wait=False)
Exemplo n.º 11
0
def test_job_lifecycle(helper: Helper) -> None:

    job_name = f"job-{os.urandom(5).hex()}"

    # Kill another active jobs with same name, if any
    captured = helper.run_cli(["-q", "job", "ls", "--name", job_name])
    if captured.out:
        jobs_same_name = captured.out.split("\n")
        assert len(jobs_same_name
                   ) == 1, f"found multiple active jobs named {job_name}"
        job_id = jobs_same_name[0]
        helper.run_cli(["job", "kill", job_name])
        helper.wait_job_change_state_from(job_id, JobStatus.RUNNING)
        captured = helper.run_cli(["-q", "job", "ls", "--name", job_name])
        assert not captured.out

    # Remember original running jobs
    captured = helper.run_cli(
        ["job", "ls", "--status", "running", "--status", "pending"])
    store_out_list = captured.out.split("\n")[1:]
    jobs_orig = [x.split("  ")[0] for x in store_out_list]

    command = 'bash -c "sleep 10m; false"'
    captured = helper.run_cli([
        "job",
        "submit",
        *JOB_TINY_CONTAINER_PARAMS,
        "--http",
        "80",
        "--non-preemptible",
        "--no-wait-start",
        "--name",
        job_name,
        UBUNTU_IMAGE_NAME,
        command,
    ])
    match = re.match("Job ID: (.+) Status:", captured.out)
    assert match is not None
    job_id = match.group(1)
    assert job_id.startswith("job-")
    assert job_id not in jobs_orig
    assert f"Name: {job_name}" in captured.out
    assert re.search("Http URL: http", captured.out), captured.out

    # Check it is in a running,pending job list now
    captured = helper.run_cli(
        ["job", "ls", "--status", "running", "--status", "pending"])
    store_out_list = captured.out.split("\n")[1:]
    jobs_updated = [x.split("  ")[0] for x in store_out_list]
    assert job_id in jobs_updated

    # Wait until the job is running
    helper.wait_job_change_state_to(job_id, JobStatus.RUNNING)

    # Check that it is in a running job list
    captured = helper.run_cli(["job", "ls", "--status", "running"])
    store_out = captured.out
    assert job_id in store_out
    # Check that the command is in the list
    assert command in store_out

    # Check that no command is in the list if quite
    captured = helper.run_cli(["-q", "job", "ls", "--status", "running"])
    store_out = captured.out
    assert job_id in store_out
    assert command not in store_out

    # Kill the job by name
    captured = helper.run_cli(["job", "kill", job_name])

    # Currently we check that the job is not running anymore
    # TODO(adavydow): replace to succeeded check when racecon in
    # platform-api fixed.
    helper.wait_job_change_state_from(job_id, JobStatus.RUNNING)

    # Check that it is not in a running job list anymore
    captured = helper.run_cli(["job", "ls", "--status", "running"])
    store_out = captured.out
    assert job_id not in store_out

    # Check job ls by name
    captured = helper.run_cli(["job", "ls", "-n", job_name, "-s", "succeeded"])
    store_out = captured.out
    assert job_id in store_out
    assert job_name in store_out

    # Check job status by id
    captured = helper.run_cli(["job", "status", job_id])
    store_out = captured.out
    assert store_out.startswith(f"Job: {job_id}\nName: {job_name}")
    # Check correct exit code is returned
    # assert "Exit code: 0" in store_out

    # Check job status by name
    captured = helper.run_cli(["job", "status", job_name])
    store_out = captured.out
    assert store_out.startswith(f"Job: {job_id}\nName: {job_name}")
Exemplo n.º 12
0
def test_job_description(helper: Helper) -> None:
    # Remember original running jobs
    captured = helper.run_cli(
        ["job", "ls", "--status", "running", "--status", "pending"])
    store_out_list = captured.out.split("\n")[1:]
    jobs_orig = [x.split("  ")[0] for x in store_out_list]
    description = "Test description for a job"
    # Run a new job
    command = 'bash -c "sleep 10m; false"'
    captured = helper.run_cli([
        "job",
        "submit",
        *JOB_TINY_CONTAINER_PARAMS,
        "--http",
        "80",
        "--description",
        description,
        "--non-preemptible",
        "--no-wait-start",
        UBUNTU_IMAGE_NAME,
        command,
    ])
    match = re.match("Job ID: (.+) Status:", captured.out)
    assert match is not None
    job_id = match.group(1)

    # Check it was not running before
    assert job_id.startswith("job-")
    assert job_id not in jobs_orig

    # Check it is in a running,pending job list now
    captured = helper.run_cli(
        ["job", "ls", "--status", "running", "--status", "pending"])
    store_out_list = captured.out.split("\n")[1:]
    jobs_updated = [x.split("  ")[0] for x in store_out_list]
    assert job_id in jobs_updated

    # Wait until the job is running
    helper.wait_job_change_state_to(job_id, JobStatus.RUNNING,
                                    JobStatus.FAILED)

    # Check that it is in a running job list
    captured = helper.run_cli(["job", "ls", "--status", "running"])
    store_out = captured.out
    assert job_id in store_out
    # Check that description is in the list
    assert description in store_out
    assert command in store_out

    # Check that no description is in the list if quite
    captured = helper.run_cli(["-q", "job", "ls", "--status", "running"])
    store_out = captured.out
    assert job_id in store_out
    assert description not in store_out
    assert command not in store_out

    # Kill the job
    captured = helper.run_cli(["job", "kill", job_id])

    # Currently we check that the job is not running anymore
    # TODO(adavydow): replace to succeeded check when racecon in
    # platform-api fixed.
    helper.wait_job_change_state_from(job_id, JobStatus.RUNNING)

    # Check that it is not in a running job list anymore
    captured = helper.run_cli(["job", "ls", "--status", "running"])
    store_out = captured.out
    assert job_id not in store_out