def _get_failed_task(cls, analysis: dxpy.DXAnalysis) -> dict: """ Find the causal failure within an execution tree and get the logs. """ query = { "project": dxpy.WORKSPACE_ID, "state": "failed", "describe": { "stateTransitions": True }, "include_subjobs": True, "root_execution": analysis.get_id() } cause = None cause_ts = None for execution_result in dxpy.find_executions(**query): if "stateTransitions" in execution_result["describe"]: for st in execution_result["describe"]["stateTransitions"]: if st["newState"] == "failed": ts = st["setAt"] if cause is None or ts < cause_ts: cause = execution_result cause_ts = ts if cause: stdout, stderr = cls._get_logs(cause["id"]) return { "failed_task": cause["id"], "failed_task_stdout": stdout, "failed_task_stderr": stderr } else: return { "msg": f"Analysis {analysis.get_id()} failed but the cause could not " f"be determined" }
def main(): args = get_args() # applets = list(dxpy.find_data_objects(classname='applet', name='dbgap_sra_to_fastq*', name_mode='glob', project=args.project, return_handler=True)) # assert applets srrs = args.SRRs or args.infile fieldnames = [ 'SRR', 'sra_size', 'sra_md5', 'fastq_id', 'fastq_alias', 'fastq_size', 'fastq_name', 'fastq_md5' ] writer = csv.DictWriter(args.outfile, fieldnames, delimiter='\t') writer.writeheader() # jobs = [] # for applet in applets: # jobs.extend(list(dxpy.find_executions(executable=applet, describe=True, first_page_size=1000))) for row in srrs: if row.startswith('#'): continue srr = row.strip() srr_jobs = [ j.get('describe') for j in dxpy.find_executions(describe=True, name="%s_sra_to_fastq" % (srr)) ] # srr_jobs = [j['describe'] for j in jobs if j['describe'].get('state') == 'done' and srr in j['describe']['input']['SRR']] if not srr_jobs: writer.writerow({'SRR': "%s: not downloaded" % (srr)}) else: if not any([job.get('state') == 'done' for job in srr_jobs]): for job in srr_jobs: writer.writerow({ 'SRR': "%s: job %s %s" % (srr, job.get('id'), job.get('state')) }) else: for job in [j for j in srr_jobs if j.get('state') == 'done']: outrow = { 'SRR': job['input'].get('SRR'), 'sra_size': job['output'].get('sra_size'), 'sra_md5': job['output'].get('sra_md5') } for i, fastq in enumerate(job['output'].get('fastq')): fh = dxpy.DXFile(job['output'].get('fastq')[i]) try: file_size = fh.describe().get('size') file_name = fh.name except dxpy.exceptions.ResourceNotFound: file_size = 'deleted' file_name = 'deleted' outrow.update({ 'fastq_id': fh.get_id(), 'fastq_alias': ":".join(["dnanexus", fh.get_id()]), 'fastq_size': file_size, # 'fastq_name': job['output'].get('fastq_filenames')[i], 'fastq_name': file_name, 'fastq_md5': job['output'].get('fastq_md5s')[i] }) writer.writerow(outrow)
created_before = "-" + args.created_before #if not args.created_after: # args.created_after = 0 # default is everything since the Unix epoch #if not args.created_before: # args.created_before = -1 # default is everything upto 1ms ago project_job_ids = [] if project_ids: for project_id in project_ids: if args.states: for state in args.states: project_job_ids.extend([ e["id"] for e in dxpy.find_executions( project=project_id, state=state, created_after=created_after, created_before=created_before) ]) else: project_job_ids.extend([ e["id"] for e in dxpy.find_executions( project=project_id, created_after=created_after, created_before=created_before) ]) execution_ids_to_describe = list( set(analysis_ids + project_job_ids + job_ids)) print("Reading {} total executions...".format(