def test_prepare_archived(ext, test_repo): job = JobDefinition( id="test_prepare_archived", job_request_id="test_request_id", study=test_repo.study, workspace="test", action="action", created_at=int(time.time()), image="ghcr.io/opensafely-core/busybox", args=["/usr/bin/true"], env={}, inputs=["output/input.csv"], output_spec={}, allow_database_access=False, ) api = local.LocalDockerAPI() archive = (config.HIGH_PRIVACY_ARCHIVE_DIR / job.workspace).with_suffix(ext) archive.parent.mkdir(parents=True, exist_ok=True) archive.write_text("I exist") status = api.prepare(job) assert status.state == ExecutorState.ERROR assert "has been archived"
def test_prepare_already_prepared(docker_cleanup, test_repo, volume_api): ensure_docker_images_present("busybox") job = JobDefinition( id="test_prepare_already_prepared", job_request_id="test_request_id", study=test_repo.study, workspace="test", action="action", created_at=int(time.time()), image="ghcr.io/opensafely-core/busybox", args=["/usr/bin/true"], env={}, inputs=["output/input.csv"], output_spec={}, allow_database_access=False, ) # create the volume already volume_api.create_volume(job) api = local.LocalDockerAPI() status = api.prepare(job) assert status.state == ExecutorState.PREPARED
def test_delete_files_error(tmp_work_dir): # use the fact that unlink() on a director raises an error populate_workspace("test", "bad/_") api = local.LocalDockerAPI() errors = api.delete_files("test", Privacy.HIGH, ["bad"]) assert errors == ["bad"]
def test_finalize_success(docker_cleanup, test_repo, tmp_work_dir, volume_api): ensure_docker_images_present("busybox") job = JobDefinition( id="test_finalize_success", job_request_id="test_request_id", study=test_repo.study, workspace="test", action="action", created_at=int(time.time()), image="ghcr.io/opensafely-core/busybox", args=[ "touch", "/workspace/output/output.csv", "/workspace/output/summary.csv" ], env={}, inputs=["output/input.csv"], output_spec={ "output/output.*": "high_privacy", "output/summary.*": "medium_privacy", }, allow_database_access=False, ) populate_workspace(job.workspace, "output/input.csv") api = local.LocalDockerAPI() status = api.prepare(job) assert status.state == ExecutorState.PREPARING status = api.execute(job) assert status.state == ExecutorState.EXECUTING wait_for_state(api, job, ExecutorState.EXECUTED) status = api.finalize(job) assert status.state == ExecutorState.FINALIZING # we don't need to wait assert api.get_status(job).state == ExecutorState.FINALIZED assert job.id in local.RESULTS # for test debugging if any asserts fail print(get_log(job)) results = api.get_results(job) assert results.exit_code == 0 assert results.outputs == { "output/output.csv": "high_privacy", "output/summary.csv": "medium_privacy", } assert results.unmatched_patterns == []
def test_finalize_failed_oomkilled(docker_cleanup, test_repo, tmp_work_dir, volume_api): ensure_docker_images_present("busybox") job = JobDefinition( id="test_finalize_failed", job_request_id="test_request_id", study=test_repo.study, workspace="test", action="action", created_at=int(time.time()), image="ghcr.io/opensafely-core/busybox", # Consume memory by writing to the tmpfs at /dev/shm # We write a lot more that our limit, to ensure the OOM killer kicks in # regardless of our tests host's vm.overcommit_memory settings. args=["sh", "-c", "head -c 100m /dev/urandom >/dev/shm/foo"], env={}, inputs=["output/input.csv"], output_spec={ "output/output.*": "high_privacy", "output/summary.*": "medium_privacy", }, allow_database_access=False, memory_limit="6M", # lowest allowable limit ) populate_workspace(job.workspace, "output/input.csv") api = local.LocalDockerAPI() status = api.prepare(job) assert status.state == ExecutorState.PREPARING status = api.execute(job) assert status.state == ExecutorState.EXECUTING wait_for_state(api, job, ExecutorState.EXECUTED) status = api.finalize(job) assert status.state == ExecutorState.FINALIZING # we don't need to wait assert api.get_status(job).state == ExecutorState.FINALIZED assert job.id in local.RESULTS assert local.RESULTS[job.id].exit_code == 137 # Note, 6MB is rounded to 0.01GBM by the formatter assert (local.RESULTS[job.id].message == "Ran out of memory (limit for this job was 0.01GB)")
def test_finalize_failed_137(docker_cleanup, test_repo, tmp_work_dir, volume_api): ensure_docker_images_present("busybox") job = JobDefinition( id="test_finalize_failed", job_request_id="test_request_id", study=test_repo.study, workspace="test", action="action", created_at=int(time.time()), image="ghcr.io/opensafely-core/busybox", args=["sleep", "101"], env={}, inputs=["output/input.csv"], output_spec={ "output/output.*": "high_privacy", "output/summary.*": "medium_privacy", }, allow_database_access=False, ) populate_workspace(job.workspace, "output/input.csv") api = local.LocalDockerAPI() status = api.prepare(job) assert status.state == ExecutorState.PREPARING status = api.execute(job) assert status.state == ExecutorState.EXECUTING # impersonate an admin docker.kill(local.container_name(job)) wait_for_state(api, job, ExecutorState.EXECUTED) status = api.finalize(job) assert status.state == ExecutorState.FINALIZING # we don't need to wait assert api.get_status(job).state == ExecutorState.FINALIZED assert job.id in local.RESULTS assert local.RESULTS[job.id].exit_code == 137 assert local.RESULTS[job.id].message == "Killed by an OpenSAFELY admin"
def test_prepare_success(docker_cleanup, test_repo, tmp_work_dir, volume_api): ensure_docker_images_present("busybox") job = JobDefinition( id="test-id", job_request_id="test_request_id", study=test_repo.study, workspace="test", action="action", created_at=int(time.time()), image="ghcr.io/opensafely-core/busybox", args=["/usr/bin/true"], env={}, inputs=["output/input.csv"], output_spec={ "*": "medium", "**/*": "medium", }, allow_database_access=False, ) populate_workspace(job.workspace, "output/input.csv") api = local.LocalDockerAPI() status = api.prepare(job) assert status.state == ExecutorState.PREPARING # we don't need to wait for this is currently synchronous assert api.get_status(job).state == ExecutorState.PREPARED assert volume_api.volume_exists(job) # check files have been copied expected = set(list_repo_files(test_repo.source) + job.inputs) expected.add(local.TIMESTAMP_REFERENCE_FILE) # glob_volume_files uses find, and its '**/*' regex doesn't find files in # the root dir, which is arguably correct. files = volume_api.glob_volume_files(job) all_files = set(files["*"] + files["**/*"]) assert all_files == expected
def test_execute_success(docker_cleanup, test_repo, tmp_work_dir, volume_api): ensure_docker_images_present("busybox") job = JobDefinition( id="test_execute_success", job_request_id="test_request_id", study=test_repo.study, workspace="test", action="action", created_at=int(time.time()), image="ghcr.io/opensafely-core/busybox", args=["/usr/bin/true"], env={}, inputs=["output/input.csv"], output_spec={}, allow_database_access=False, cpu_count=1.5, memory_limit="1G", ) populate_workspace(job.workspace, "output/input.csv") api = local.LocalDockerAPI() # use prepare step as test set up status = api.prepare(job) assert status.state == ExecutorState.PREPARING status = api.execute(job) assert status.state == ExecutorState.EXECUTING # could be in either state assert api.get_status(job).state in ( ExecutorState.EXECUTING, ExecutorState.EXECUTED, ) container_data = docker.container_inspect(local.container_name(job), "HostConfig") assert container_data["NanoCpus"] == int(1.5 * 1e9) assert container_data["Memory"] == 2**30 # 1G
def test_prepare_no_image(docker_cleanup, test_repo, volume_api): job = JobDefinition( id="test_prepare_no_image", job_request_id="test_request_id", study=test_repo.study, workspace="test", action="action", created_at=int(time.time()), image="invalid-test-image", args=["/usr/bin/true"], env={}, inputs=["output/input.csv"], output_spec={}, allow_database_access=False, ) api = local.LocalDockerAPI() status = api.prepare(job) assert status.state == ExecutorState.ERROR assert job.image in status.message.lower()
def test_delete_files_success(tmp_work_dir): high = populate_workspace("test", "file.txt") medium = populate_workspace("test", "file.txt", privacy="medium") assert high.exists() assert medium.exists() api = local.LocalDockerAPI() errors = api.delete_files("test", Privacy.HIGH, ["file.txt"]) # on windows, we cannot always delete, so check we tried to delete it if errors: assert errors == ["file.txt"] else: assert not high.exists() assert medium.exists() errors = api.delete_files("test", Privacy.MEDIUM, ["file.txt"]) if errors: assert errors == ["file.txt"] else: assert not medium.exists()
def test_cleanup_success(docker_cleanup, test_repo, tmp_work_dir, volume_api): ensure_docker_images_present("busybox") job = JobDefinition( id="test_cleanup_success", job_request_id="test_request_id", study=test_repo.study, workspace="test", action="action", created_at=int(time.time()), image="ghcr.io/opensafely-core/busybox", args=["/usr/bin/true"], env={}, inputs=["output/input.csv"], output_spec={}, allow_database_access=False, ) populate_workspace(job.workspace, "output/input.csv") api = local.LocalDockerAPI() api.prepare(job) api.execute(job) container = local.container_name(job) assert volume_api.volume_exists(job) assert docker.container_exists(container) status = api.cleanup(job) assert status.state == ExecutorState.UNKNOWN status = api.get_status(job) assert status.state == ExecutorState.UNKNOWN assert not volume_api.volume_exists(job) assert not docker.container_exists(container)
def test_execute_not_prepared(docker_cleanup, test_repo, tmp_work_dir, volume_api): ensure_docker_images_present("busybox") job = JobDefinition( id="test_execute_not_prepared", job_request_id="test_request_id", study=test_repo.study, workspace="test", action="action", created_at=int(time.time()), image="ghcr.io/opensafely-core/busybox", args=["/usr/bin/true"], env={}, inputs=["output/input.csv"], output_spec={}, allow_database_access=False, ) api = local.LocalDockerAPI() status = api.execute(job) # this will be turned into an error by the loop assert status.state == ExecutorState.UNKNOWN
def test_delete_files_bad_privacy(tmp_work_dir): api = local.LocalDockerAPI() populate_workspace("test", "file.txt") with pytest.raises(Exception): api.delete_files("test", None, ["file.txt"])