Exemplo n.º 1
0
def get_jobs_per_project(projects):
    """
    Return dict of project2state2jobs

    Args:
        projects (list): List of project ids

    Returns:
        dict: Dict of project to state to jobs
    """

    project2jobs = defaultdict(lambda: defaultdict(list))
    project_no_run = []

    for project in projects:
        project_id = project.describe()["id"]
        project_name = project.describe()["name"]

        log.info(f'Get job per {project_name} started')
        jobs = list(dx.find_jobs(project=project_id, created_after="-24h"))

        if jobs:
            for job in jobs:
                job = dx.DXJob(job["id"])
                job_name = job.describe()["name"]
                job_state = job.describe()["state"]
                project2jobs[(project_name,
                              project_id)][job_state].append(job_name)
        else:
            project_no_run.append(project_name)

    return project2jobs, project_no_run
Exemplo n.º 2
0
def dnanexus_workflows_get(event, context):
    """Handle GET /workflows/{workflow_id}.

    Args:
        event (dict): has a key "workflow_id" that's been taking from the URL.
            This is the id that was generated when the POST request was made,
            and the dnanexus executions we care about should be tagged with it.
        context (dict): an AWS context object that we ignore
    """

    auth_header = set_dx_authorization(event["headers"]["Authorization"])
    wes_workflow_id = event["workflow_id"]

    # First try to find the dxWDL job that's the parent job of everything
    try:
        base_job = list(
            dxpy.find_jobs(properties={"wes_id": wes_workflow_id},
                           name="WES dxWDL Runner",
                           return_handler=True))[0]
    except IndexError:
        error_dict = {
            "errorType": "NotFound",
            "httpStatus": "404",
            "requestId": context.aws_request_id,
            "message": "Workflow {} was not found".format(wes_workflow_id)
        }
        return error_dict

    child_jobs = list(
        dxpy.find_jobs(root_execution=base_job.get_id(), return_handler=True))
    child_job_ids = [j.get_id() for j in child_jobs]

    response = {
        "state": "",
        "workflow_id": "",
        "workflow_log": {
            "start_time": "",
            "end_time": "",
            "stdout": "",
            "stderr": "",
            "exit_code": -1
        },
        "task_logs": []
    }

    dx_state = base_job.describe()["state"]
    wes_state = dx_to_wes_state(dx_state)
    response["state"] = wes_state

    def get_logs_for_job(dx_job_id):
        """Retrieve the logs for single DXJob."""

        dx_exe_path = os.path.abspath("bin/dx")
        cmd = [
            "dx", "watch", "-q", "--no-timestamps", "--get-streams",
            "--no-follow", dx_job_id
        ]
        proc = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            env={
                "DX_SECURITY_CONTEXT":
                auth_header,
                "PYTHONPATH":
                ':'.join([
                    os.environ.get("PYTHONPATH", ""),
                    os.path.dirname(os.path.dirname(dx_exe_path))
                ]),
                "PATH":
                ':'.join([os.environ["PATH"],
                          os.path.dirname(dx_exe_path)])
            })
        stdout, stderr = proc.communicate()
        return stdout

    pool = Pool(8)

    jobs_to_logs = dict(
        zip(child_job_ids, pool.map(get_logs_for_job, child_job_ids)))

    for job_id in child_job_ids:
        dx_job = dxpy.DXJob(job_id)
        job_desc = dx_job.describe()

        task_name = job_desc["executableName"]
        time_fmt = "{:%Y-%m-%dT%H:%M:%S}"
        try:
            start_time = time_fmt.format(
                datetime.datetime.fromtimestamp(job_desc["startedRunning"] /
                                                1000))
        except:
            start_time = ""
        try:
            end_time = time_fmt.format(
                datetime.datetime.fromtimestamp(job_desc["stoppedRunning"] /
                                                1000))
        except:
            end_time = ""

        try:
            log = jobs_to_logs[job_id]
        except:
            log = ""

        response["task_logs"].append({
            "name": task_name + ":" + job_id,
            "start_time": start_time,
            "end_time": end_time,
            "stdout": log,
            "stderr": "",
            "exit_code": -1
        })

    return response