Exemplo n.º 1
0
def persist_outputs(job, outputs, container_metadata):
    """Copy logs and generated outputs to persistant storage."""
    # job_metadata is a big dict capturing everything we know about the state
    # of the job
    job.completed_at = int(time.time())
    job_metadata = dict()
    job_metadata["job_id"] = job.id
    job_metadata["job_request_id"] = job.job_request_id
    job_metadata["created_at"] = job.created_at
    job_metadata["completed_at"] = int(time.time())
    job_metadata["docker_image_id"] = container_metadata["Image"]
    # convert exit code to str so 0 exit codes get logged
    job_metadata["exit_code"] = str(container_metadata["State"]["ExitCode"])
    job_metadata["container_metadata"] = container_metadata
    job_metadata["outputs"] = outputs
    job_metadata["commit"] = job.study.commit
    job_metadata["local_run"] = True

    # Dump useful info in log directory
    log_dir = get_log_dir(job)
    ensure_overwritable(log_dir / "logs.txt", log_dir / "metadata.json")
    write_log_file(job, job_metadata, log_dir / "logs.txt")
    with open(log_dir / "metadata.json", "w") as f:
        json.dump(job_metadata, f, indent=2)

    # Copy logs to workspace
    workspace_dir = get_high_privacy_workspace(job.workspace)
    metadata_log_file = workspace_dir / METADATA_DIR / f"{job.action}.log"
    copy_file(log_dir / "logs.txt", metadata_log_file)
    log.info(f"Logs written to: {metadata_log_file}")

    # Extract outputs to workspace
    ensure_overwritable(*[workspace_dir / f for f in outputs.keys()])
    volume = volume_name(job)
    for filename in outputs.keys():
        log.info(f"Extracting output file: {filename}")
        docker.copy_from_volume(volume, filename, workspace_dir / filename)

    # Copy out logs and medium privacy files
    medium_privacy_dir = get_medium_privacy_workspace(job.workspace)
    if medium_privacy_dir:
        copy_file(
            workspace_dir / METADATA_DIR / f"{job.action}.log",
            medium_privacy_dir / METADATA_DIR / f"{job.action}.log",
        )
        for filename, privacy_level in outputs.items():
            if privacy_level == "moderately_sensitive":
                copy_file(workspace_dir / filename, medium_privacy_dir / filename)

        # this can be removed once osrelease is dead
        write_manifest_file(
            medium_privacy_dir,
            {"repo": job.study.git_repo_url, "workspace": job.workspace},
        )
Exemplo n.º 2
0
def main(partial_job_ids, cleanup=False):
    jobs = get_jobs(partial_job_ids)
    for job in jobs:
        # If the job has been previously killed we don't want to overwrite the
        # timestamps here
        if job.state in (State.PENDING, State.RUNNING):
            mark_job_as_failed(job, "Killed by admin")
        # All these docker commands are idempotent
        docker.kill(container_name(job))
        if cleanup:
            docker.delete_container(container_name(job))
            docker.delete_volume(volume_name(job))
Exemplo n.º 3
0
def find_matching_outputs(job):
    """
    Returns a dict mapping output filenames to their privacy level, plus a list
    of any patterns that had no matches at all
    """
    all_matches = docker.glob_volume_files(volume_name(job), job.output_spec.keys())
    unmatched_patterns = []
    outputs = {}
    for pattern, privacy_level in job.output_spec.items():
        filenames = all_matches[pattern]
        if not filenames:
            unmatched_patterns.append(pattern)
        for filename in filenames:
            outputs[filename] = privacy_level
    return outputs, unmatched_patterns
Exemplo n.º 4
0
    def get_status(self, job):
        name = container_name(job)
        job_running = docker.container_inspect(
            name, "State.Running", none_if_not_exists=True
        )

        if job_running is None:
            # no container for this job found
            volume = volume_name(job)
            if docker.volume_exists(volume):
                return JobStatus(ExecutorState.PREPARED)
            else:
                return JobStatus(ExecutorState.UNKNOWN)

        elif job_running:
            return JobStatus(ExecutorState.EXECUTING)
        elif job.id in RESULTS:
            return JobStatus(ExecutorState.FINALIZED)
        else:  # container present but not running, i.e. finished
            return JobStatus(ExecutorState.EXECUTED)
Exemplo n.º 5
0
    def execute(self, job):
        current = self.get_status(job)
        if current.state != ExecutorState.PREPARED:
            return current

        try:
            docker.run(
                container_name(job),
                [job.image] + job.args,
                volume=(volume_name(job), "/workspace"),
                env=job.env,
                allow_network_access=job.allow_database_access,
                label=LABEL,
                labels=get_job_labels(job),
            )
        except Exception as exc:
            return JobStatus(
                ExecutorState.ERROR, f"Failed to start docker container: {exc}"
            )

        return JobStatus(ExecutorState.EXECUTING)
Exemplo n.º 6
0
def prepare_job(job):
    """Creates a volume and populates it with the repo and input files."""
    workspace_dir = get_high_privacy_workspace(job.workspace)

    volume = volume_name(job)
    docker.create_volume(volume, get_job_labels(job))

    # `docker cp` can't create parent directories for us so we make sure all
    # these directories get created when we copy in the code
    extra_dirs = set(Path(filename).parent for filename in job.inputs)

    try:
        if job.study.git_repo_url and job.study.commit:
            copy_git_commit_to_volume(
                volume, job.study.git_repo_url, job.study.commit, extra_dirs
            )
        else:
            # We only encounter jobs without a repo or commit when using the
            # "local_run" command to execute uncommitted local code
            copy_local_workspace_to_volume(volume, workspace_dir, extra_dirs)
    except subprocess.CalledProcessError:
        raise LocalDockerError(
            f"Could not checkout commit {job.study.commit} from {job.study.git_repo_url}"
        )

    for filename in job.inputs:
        log.info(f"Copying input file: {filename}")
        if not (workspace_dir / filename).exists():
            raise LocalDockerError(
                f"The file {filename} doesn't exist in workspace {job.workspace} as requested for job {job.id}"
            )
        docker.copy_to_volume(volume, workspace_dir / filename, filename)

    # Hack: see `get_unmatched_outputs`. For some reason this requires a
    # non-empty file so copying `os.devnull` didn't work.
    some_non_empty_file = Path(__file__)
    docker.copy_to_volume(volume, some_non_empty_file, TIMESTAMP_REFERENCE_FILE)
    return volume
def main():
    print(
        "== DANGER ZONE ==\n"
        "\n"
        "This will kill all running jobs and reset them to the PENDING state, ready\n"
        "to be restarted following a reboot.\n"
        "\n"
        "It should only be run when the job-runner service has been stopped."
        "\n")
    confirm = input("Are you sure you want to continue? (y/N)")
    assert confirm.strip().lower() == "y"
    # Reset all running jobs to pending
    update_where(Job, {
        "state": State.PENDING,
        "started_at": None
    },
                 state=State.RUNNING)
    # Make sure all containers and volumes are removed ready to freshly restart the jobs
    # after the reboot
    for job in find_where(Job, state=State.PENDING):
        docker.kill(container_name(job))
        docker.delete_container(container_name(job))
        docker.delete_volume(volume_name(job))