def test_prepare_archived(ext, test_repo):
    job = JobDefinition(
        id="test_prepare_archived",
        job_request_id="test_request_id",
        study=test_repo.study,
        workspace="test",
        action="action",
        created_at=int(time.time()),
        image="ghcr.io/opensafely-core/busybox",
        args=["/usr/bin/true"],
        env={},
        inputs=["output/input.csv"],
        output_spec={},
        allow_database_access=False,
    )

    api = local.LocalDockerAPI()
    archive = (config.HIGH_PRIVACY_ARCHIVE_DIR /
               job.workspace).with_suffix(ext)
    archive.parent.mkdir(parents=True, exist_ok=True)
    archive.write_text("I exist")
    status = api.prepare(job)

    assert status.state == ExecutorState.ERROR
    assert "has been archived"
def test_prepare_already_prepared(docker_cleanup, test_repo, volume_api):
    ensure_docker_images_present("busybox")

    job = JobDefinition(
        id="test_prepare_already_prepared",
        job_request_id="test_request_id",
        study=test_repo.study,
        workspace="test",
        action="action",
        created_at=int(time.time()),
        image="ghcr.io/opensafely-core/busybox",
        args=["/usr/bin/true"],
        env={},
        inputs=["output/input.csv"],
        output_spec={},
        allow_database_access=False,
    )

    # create the volume already
    volume_api.create_volume(job)

    api = local.LocalDockerAPI()
    status = api.prepare(job)

    assert status.state == ExecutorState.PREPARED
def test_delete_files_error(tmp_work_dir):

    # use the fact that unlink() on a director raises an error
    populate_workspace("test", "bad/_")

    api = local.LocalDockerAPI()
    errors = api.delete_files("test", Privacy.HIGH, ["bad"])

    assert errors == ["bad"]
def test_finalize_success(docker_cleanup, test_repo, tmp_work_dir, volume_api):
    ensure_docker_images_present("busybox")

    job = JobDefinition(
        id="test_finalize_success",
        job_request_id="test_request_id",
        study=test_repo.study,
        workspace="test",
        action="action",
        created_at=int(time.time()),
        image="ghcr.io/opensafely-core/busybox",
        args=[
            "touch", "/workspace/output/output.csv",
            "/workspace/output/summary.csv"
        ],
        env={},
        inputs=["output/input.csv"],
        output_spec={
            "output/output.*": "high_privacy",
            "output/summary.*": "medium_privacy",
        },
        allow_database_access=False,
    )

    populate_workspace(job.workspace, "output/input.csv")

    api = local.LocalDockerAPI()

    status = api.prepare(job)
    assert status.state == ExecutorState.PREPARING
    status = api.execute(job)
    assert status.state == ExecutorState.EXECUTING

    wait_for_state(api, job, ExecutorState.EXECUTED)

    status = api.finalize(job)
    assert status.state == ExecutorState.FINALIZING

    # we don't need to wait
    assert api.get_status(job).state == ExecutorState.FINALIZED
    assert job.id in local.RESULTS

    # for test debugging if any asserts fail
    print(get_log(job))
    results = api.get_results(job)
    assert results.exit_code == 0
    assert results.outputs == {
        "output/output.csv": "high_privacy",
        "output/summary.csv": "medium_privacy",
    }
    assert results.unmatched_patterns == []
def test_finalize_failed_oomkilled(docker_cleanup, test_repo, tmp_work_dir,
                                   volume_api):
    ensure_docker_images_present("busybox")

    job = JobDefinition(
        id="test_finalize_failed",
        job_request_id="test_request_id",
        study=test_repo.study,
        workspace="test",
        action="action",
        created_at=int(time.time()),
        image="ghcr.io/opensafely-core/busybox",
        # Consume memory by writing to the tmpfs at /dev/shm
        # We write a lot more that our limit, to ensure the OOM killer kicks in
        # regardless of our tests host's vm.overcommit_memory settings.
        args=["sh", "-c", "head -c 100m /dev/urandom >/dev/shm/foo"],
        env={},
        inputs=["output/input.csv"],
        output_spec={
            "output/output.*": "high_privacy",
            "output/summary.*": "medium_privacy",
        },
        allow_database_access=False,
        memory_limit="6M",  # lowest allowable limit
    )

    populate_workspace(job.workspace, "output/input.csv")

    api = local.LocalDockerAPI()

    status = api.prepare(job)
    assert status.state == ExecutorState.PREPARING
    status = api.execute(job)
    assert status.state == ExecutorState.EXECUTING

    wait_for_state(api, job, ExecutorState.EXECUTED)

    status = api.finalize(job)
    assert status.state == ExecutorState.FINALIZING

    # we don't need to wait
    assert api.get_status(job).state == ExecutorState.FINALIZED
    assert job.id in local.RESULTS
    assert local.RESULTS[job.id].exit_code == 137
    # Note, 6MB is rounded to 0.01GBM by the formatter
    assert (local.RESULTS[job.id].message ==
            "Ran out of memory (limit for this job was 0.01GB)")
def test_finalize_failed_137(docker_cleanup, test_repo, tmp_work_dir,
                             volume_api):
    ensure_docker_images_present("busybox")

    job = JobDefinition(
        id="test_finalize_failed",
        job_request_id="test_request_id",
        study=test_repo.study,
        workspace="test",
        action="action",
        created_at=int(time.time()),
        image="ghcr.io/opensafely-core/busybox",
        args=["sleep", "101"],
        env={},
        inputs=["output/input.csv"],
        output_spec={
            "output/output.*": "high_privacy",
            "output/summary.*": "medium_privacy",
        },
        allow_database_access=False,
    )

    populate_workspace(job.workspace, "output/input.csv")

    api = local.LocalDockerAPI()

    status = api.prepare(job)
    assert status.state == ExecutorState.PREPARING
    status = api.execute(job)
    assert status.state == ExecutorState.EXECUTING

    # impersonate an admin
    docker.kill(local.container_name(job))

    wait_for_state(api, job, ExecutorState.EXECUTED)

    status = api.finalize(job)
    assert status.state == ExecutorState.FINALIZING

    # we don't need to wait
    assert api.get_status(job).state == ExecutorState.FINALIZED
    assert job.id in local.RESULTS
    assert local.RESULTS[job.id].exit_code == 137
    assert local.RESULTS[job.id].message == "Killed by an OpenSAFELY admin"
def test_prepare_success(docker_cleanup, test_repo, tmp_work_dir, volume_api):
    ensure_docker_images_present("busybox")

    job = JobDefinition(
        id="test-id",
        job_request_id="test_request_id",
        study=test_repo.study,
        workspace="test",
        action="action",
        created_at=int(time.time()),
        image="ghcr.io/opensafely-core/busybox",
        args=["/usr/bin/true"],
        env={},
        inputs=["output/input.csv"],
        output_spec={
            "*": "medium",
            "**/*": "medium",
        },
        allow_database_access=False,
    )

    populate_workspace(job.workspace, "output/input.csv")

    api = local.LocalDockerAPI()
    status = api.prepare(job)

    assert status.state == ExecutorState.PREPARING

    # we don't need to wait for this is currently synchronous
    assert api.get_status(job).state == ExecutorState.PREPARED

    assert volume_api.volume_exists(job)

    # check files have been copied
    expected = set(list_repo_files(test_repo.source) + job.inputs)
    expected.add(local.TIMESTAMP_REFERENCE_FILE)

    # glob_volume_files uses find, and its '**/*' regex doesn't find files in
    # the root dir, which is arguably correct.
    files = volume_api.glob_volume_files(job)
    all_files = set(files["*"] + files["**/*"])
    assert all_files == expected
def test_execute_success(docker_cleanup, test_repo, tmp_work_dir, volume_api):
    ensure_docker_images_present("busybox")

    job = JobDefinition(
        id="test_execute_success",
        job_request_id="test_request_id",
        study=test_repo.study,
        workspace="test",
        action="action",
        created_at=int(time.time()),
        image="ghcr.io/opensafely-core/busybox",
        args=["/usr/bin/true"],
        env={},
        inputs=["output/input.csv"],
        output_spec={},
        allow_database_access=False,
        cpu_count=1.5,
        memory_limit="1G",
    )

    populate_workspace(job.workspace, "output/input.csv")

    api = local.LocalDockerAPI()

    # use prepare step as test set up
    status = api.prepare(job)
    assert status.state == ExecutorState.PREPARING

    status = api.execute(job)
    assert status.state == ExecutorState.EXECUTING

    # could be in either state
    assert api.get_status(job).state in (
        ExecutorState.EXECUTING,
        ExecutorState.EXECUTED,
    )

    container_data = docker.container_inspect(local.container_name(job),
                                              "HostConfig")
    assert container_data["NanoCpus"] == int(1.5 * 1e9)
    assert container_data["Memory"] == 2**30  # 1G
def test_prepare_no_image(docker_cleanup, test_repo, volume_api):
    job = JobDefinition(
        id="test_prepare_no_image",
        job_request_id="test_request_id",
        study=test_repo.study,
        workspace="test",
        action="action",
        created_at=int(time.time()),
        image="invalid-test-image",
        args=["/usr/bin/true"],
        env={},
        inputs=["output/input.csv"],
        output_spec={},
        allow_database_access=False,
    )

    api = local.LocalDockerAPI()
    status = api.prepare(job)

    assert status.state == ExecutorState.ERROR
    assert job.image in status.message.lower()
def test_delete_files_success(tmp_work_dir):

    high = populate_workspace("test", "file.txt")
    medium = populate_workspace("test", "file.txt", privacy="medium")

    assert high.exists()
    assert medium.exists()

    api = local.LocalDockerAPI()
    errors = api.delete_files("test", Privacy.HIGH, ["file.txt"])

    # on windows, we cannot always delete, so check we tried to delete it
    if errors:
        assert errors == ["file.txt"]
    else:
        assert not high.exists()
    assert medium.exists()

    errors = api.delete_files("test", Privacy.MEDIUM, ["file.txt"])
    if errors:
        assert errors == ["file.txt"]
    else:
        assert not medium.exists()
def test_cleanup_success(docker_cleanup, test_repo, tmp_work_dir, volume_api):
    ensure_docker_images_present("busybox")

    job = JobDefinition(
        id="test_cleanup_success",
        job_request_id="test_request_id",
        study=test_repo.study,
        workspace="test",
        action="action",
        created_at=int(time.time()),
        image="ghcr.io/opensafely-core/busybox",
        args=["/usr/bin/true"],
        env={},
        inputs=["output/input.csv"],
        output_spec={},
        allow_database_access=False,
    )

    populate_workspace(job.workspace, "output/input.csv")

    api = local.LocalDockerAPI()
    api.prepare(job)
    api.execute(job)

    container = local.container_name(job)
    assert volume_api.volume_exists(job)
    assert docker.container_exists(container)

    status = api.cleanup(job)
    assert status.state == ExecutorState.UNKNOWN

    status = api.get_status(job)
    assert status.state == ExecutorState.UNKNOWN

    assert not volume_api.volume_exists(job)
    assert not docker.container_exists(container)
def test_execute_not_prepared(docker_cleanup, test_repo, tmp_work_dir,
                              volume_api):
    ensure_docker_images_present("busybox")

    job = JobDefinition(
        id="test_execute_not_prepared",
        job_request_id="test_request_id",
        study=test_repo.study,
        workspace="test",
        action="action",
        created_at=int(time.time()),
        image="ghcr.io/opensafely-core/busybox",
        args=["/usr/bin/true"],
        env={},
        inputs=["output/input.csv"],
        output_spec={},
        allow_database_access=False,
    )

    api = local.LocalDockerAPI()

    status = api.execute(job)
    # this will be turned into an error by the loop
    assert status.state == ExecutorState.UNKNOWN
def test_delete_files_bad_privacy(tmp_work_dir):
    api = local.LocalDockerAPI()
    populate_workspace("test", "file.txt")
    with pytest.raises(Exception):
        api.delete_files("test", None, ["file.txt"])