def main(): parser = parser_with_common_options() options = parser.parse_args() set_logging_from_options(options) config = Config() config.setOptions(options) config.jobStore = config.jobStore[5:] if config.jobStore.startswith('file:') else config.jobStore # ':' means an aws/google jobstore; use the old (broken?) method if ':' in config.jobStore: jobStore = Toil.resumeJobStore(config.jobStore) logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore) # TODO: This behaviour is now broken: https://github.com/DataBiosphere/toil/commit/a3d65fc8925712221e4cda116d1825d4a1e963a1 batchSystem = Toil.createBatchSystem(jobStore.config) # Should automatically kill existing jobs, so we're good. for jobID in batchSystem.getIssuedBatchJobIDs(): # Just in case we do it again. batchSystem.killBatchJobs(jobID) logger.info("All jobs SHOULD have been killed") # otherwise, kill the pid recorded in the jobstore else: pid_log = os.path.join(os.path.abspath(config.jobStore), 'pid.log') with open(pid_log, 'r') as f: pid2kill = f.read().strip() try: os.kill(int(pid2kill), signal.SIGKILL) logger.info("Toil process %s successfully terminated." % str(pid2kill)) except OSError: logger.error("Toil process %s could not be terminated." % str(pid2kill)) raise
def main(): parser = parser_with_common_options(jobstore_option=True) parser.add_argument( "jobID", nargs=1, help= "The job store id of a job within the provided jobstore to run by itself." ) parser.add_argument( "--printJobInfo", nargs=1, help= "Return information about this job to the user including preceding jobs, " "inputs, outputs, and runtime from the last known run.") options = parser.parse_args() set_logging_from_options(options) config = Config() config.setOptions(options) jobStore = Toil.resumeJobStore(config.jobStore) if options.printJobInfo: printContentsOfJobStore(jobStorePath=config.jobStore, nameOfJob=options.printJobInfo) # TODO: Option to print list of successor jobs # TODO: Option to run job within python debugger, allowing step through of arguments # idea would be to have option to import pdb and set breakpoint at the start of the user's code jobID = options.jobID[0] logger.debug(f"Running the following job locally: {jobID}") workerScript(jobStore, config, jobID, jobID, redirectOutputToLogFile=False) logger.debug(f"Finished running: {jobID}")
def main(): parser = getBasicOptionParser() parser.add_argument( "jobStore", type=str, help= "The location of the job store used by the workflow whose jobs should " "be killed." + jobStoreLocatorHelp) parser.add_argument("--version", action='version', version=version) options = parseBasicOptions(parser) config = Config() config.setOptions(options) jobStore = Toil.resumeJobStore(config.jobStore) logger.info( "Starting routine to kill running jobs in the toil workflow: %s", config.jobStore) ####This behaviour is now broken batchSystem = Toil.createBatchSystem( jobStore.config ) #This should automatically kill the existing jobs.. so we're good. for jobID in batchSystem.getIssuedBatchJobIDs( ): #Just in case we do it again. batchSystem.killBatchJobs(jobID) logger.info("All jobs SHOULD have been killed")
def main(): parser = getBasicOptionParser() parser.add_argument("jobStore", type=str, help="The location of the job store to delete. " + jobStoreLocatorHelp) parser.add_argument("--version", action='version', version=version) config = Config() config.setOptions(parseBasicOptions(parser)) logger.info("Attempting to delete the job store") jobStore = Toil.getJobStore(config.jobStore) jobStore.destroy() logger.info("Successfully deleted the job store")
def main(): """ Reports stats on the workflow, use with --stats option to toil. """ parser = getBasicOptionParser() initializeOptions(parser) options = parseBasicOptions(parser) checkOptions(options, parser) config = Config() config.setOptions(options) jobStore = Toil.resumeJobStore(config.jobStore) stats = getStats(jobStore) collatedStatsTag = processData(jobStore.config, stats) reportData(collatedStatsTag, options)
def main(): parser = getBasicOptionParser() parser.add_argument( "jobStore", type=str, help="The location of the job store used by the workflow." + jobStoreLocatorHelp) parser.add_argument("--localFilePath", nargs=1, help="Location to which to copy job store files.") parser.add_argument("--fetch", nargs="+", help="List of job-store files to be copied locally." "Use either explicit names (i.e. 'data.txt'), or " "specify glob patterns (i.e. '*.txt')") parser.add_argument( "--listFilesInJobStore", help="Prints a list of the current files in the jobStore.") parser.add_argument( "--fetchEntireJobStore", help="Copy all job store files into a local directory.") parser.add_argument( "--useSymlinks", help="Creates symlink 'shortcuts' of files in the localFilePath" " instead of hardlinking or copying, where possible. If this is" " not possible, it will copy the files (shutil.copyfile()).") parser.add_argument("--version", action='version', version=version) # Load the jobStore options = parseBasicOptions(parser) config = Config() config.setOptions(options) jobStore = Toil.resumeJobStore(config.jobStore) logger.debug("Connected to job store: %s", config.jobStore) if options.fetch: # Copy only the listed files locally logger.debug("Fetching local files: %s", options.fetch) fetchJobStoreFiles(jobStore=jobStore, options=options) elif options.fetchEntireJobStore: # Copy all jobStore files locally logger.debug("Fetching all local files.") options.fetch = "*" fetchJobStoreFiles(jobStore=jobStore, options=options) if options.listFilesInJobStore: # Log filenames and create a file containing these names in cwd printContentsOfJobStore(jobStorePath=options.jobStore)
def testMultipleJobsPerWorkerStats(self): """ Tests case where multiple jobs are run on 1 worker to insure that all jobs report back their data """ options = Job.Runner.getDefaultOptions(self._getTestJobStorePath()) options.clean = 'never' options.stats = True Job.Runner.startToil(RunTwoJobsPerWorker(), options) config = Config() config.setOptions(options) jobStore = Toil.resumeJobStore(config.jobStore) stats = getStats(jobStore) collatedStats = processData(jobStore.config, stats) self.assertTrue(len(collatedStats.job_types) == 2, "Some jobs are not represented in the stats")
def main(): """ This is a Toil pipeline to transfer TCGA data into an S3 Bucket Data is pulled down with Genetorrent and transferred to S3 via S3AM. """ # Define Parser object and add to toil def existing_file(fname): """ Argparse type for an existing file """ if not os.path.isfile(fname): raise ValueError("Invalid file: " + str(fname)) return fname parser = argparse.ArgumentParser( description=main.__doc__, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( '--sudo', dest='sudo', default=None, action='store_true', help= 'Docker usually needs sudo to execute locally, but not when running Mesos or when ' 'the user is a member of a Docker group.') Job.Runner.addToilOptions(parser) parser.add_argument('datafiles', nargs='+', help='FASTA input', type=existing_file) args = parser.parse_args() assert args.jobStore is not None config = Config() config.setOptions(args) # Store inputs from argparse inputs = {'sudo': args.sudo} datafiles = [os.path.abspath(d) for d in args.datafiles] # Start Pipeline options = Job.Runner.getDefaultOptions("./toilWorkflow") Job.Runner.startToil(Job.wrapJobFn(start_batch, datafiles, inputs), options)
def main(): parser = getBasicOptionParser() parser.add_argument("jobStore", type=str, help="The location of the job store used by the workflow whose jobs should " "be killed." + jobStoreLocatorHelp) parser.add_argument("--version", action='version', version=version) options = parseBasicOptions(parser) config = Config() config.setOptions(options) jobStore = Toil.resumeJobStore(config.jobStore) logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore) ####This behaviour is now broken batchSystem = Toil.createBatchSystem(jobStore.config) #This should automatically kill the existing jobs.. so we're good. for jobID in batchSystem.getIssuedBatchJobIDs(): #Just in case we do it again. batchSystem.killBatchJobs(jobID) logger.info("All jobs SHOULD have been killed")
def main(): parser = getBasicOptionParser() parser.add_argument("jobStore", type=str, help="The location of the job store to delete. " + jobStoreLocatorHelp) parser.add_argument("--version", action='version', version=version) config = Config() config.setOptions(parseBasicOptions(parser)) try: jobStore = Toil.getJobStore(config.jobStore) jobStore.resume() jobStore.destroy() logger.info("Successfully deleted the job store: %s" % config.jobStore) except NoSuchJobStoreException: logger.info("Failed to delete the job store: %s is non-existent" % config.jobStore) except: logger.info("Failed to delete the job store: %s" % config.jobStore) raise
def main() -> None: """Reports stats on the workflow, use with --stats option to toil.""" parser = parser_with_common_options() add_stats_options(parser) options = parser.parse_args() for c in options.categories.split(","): if c.strip() not in category_choices: raise ValueError(f'{c} not in {category_choices}!') options.categories = [ x.strip().lower() for x in options.categories.split(",") ] set_logging_from_options(options) config = Config() config.setOptions(options) jobStore = Toil.resumeJobStore(config.jobStore) stats = getStats(jobStore) collatedStatsTag = processData(jobStore.config, stats) reportData(collatedStatsTag, options)
def main(): parser = getBasicOptionParser() parser.add_argument( "jobStore", type=str, help="The location of the job store used by the workflow." + jobStoreLocatorHelp) parser.add_argument("jobID", nargs=1, help="The job store id of a job " "within the provided jobstore to run by itself.") parser.add_argument( "--printJobInfo", nargs=1, help="Return information about this job to the user" " including preceding jobs, inputs, outputs, and runtime" " from the last known run.") parser.add_argument("--version", action='version', version=version) # Parse options options = parseBasicOptions(parser) config = Config() config.setOptions(options) # Load the job store jobStore = Toil.resumeJobStore(config.jobStore) if options.printJobInfo: printContentsOfJobStore(jobStorePath=options.jobStore, nameOfJob=options.printJobInfo) # TODO: Option to print list of successor jobs # TODO: Option to run job within python debugger, allowing step through of arguments # idea would be to have option to import pdb and set breakpoint at the start of the user's code # Run the job locally jobID = options.jobID[0] logger.debug("Going to run the following job locally: %s", jobID) workerScript(jobStore, config, jobID, jobID, redirectOutputToLogFile=False) logger.debug("Ran the following job locally: %s", jobID)
def main() -> None: parser = parser_with_common_options() options = parser.parse_args() set_logging_from_options(options) config = Config() config.setOptions(options) job_store_type, _ = Toil.parseLocator(config.jobStore) if job_store_type != 'file': # Remote (aws/google) jobstore; use the old (broken?) method job_store = Toil.resumeJobStore(config.jobStore) logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore) # TODO: This behaviour is now broken: https://github.com/DataBiosphere/toil/commit/a3d65fc8925712221e4cda116d1825d4a1e963a1 # There's no guarantee that the batch system in use can enumerate # running jobs belonging to the job store we've attached to. And # moreover we don't even bother trying to kill the leader at its # recorded PID, even if it is a local process. batch_system = Toil.createBatchSystem(job_store.config) # Should automatically kill existing jobs, so we're good. for job_id in batch_system.getIssuedBatchJobIDs(): # Just in case we do it again. batch_system.killBatchJobs([job_id]) logger.info("All jobs SHOULD have been killed") else: # otherwise, kill the pid recorded in the jobstore. # TODO: We assume thnis is a local PID. job_store = Toil.resumeJobStore(config.jobStore) assert isinstance(job_store, FileJobStore), "Need a FileJobStore which has a sharedFilesDir" pid_log = os.path.join(job_store.sharedFilesDir, 'pid.log') with open(pid_log) as f: pid_to_kill = f.read().strip() try: os.kill(int(pid_to_kill), signal.SIGTERM) logger.info("Toil process %s successfully terminated." % str(pid_to_kill)) except OSError: logger.error("Toil process %s could not be terminated." % str(pid_to_kill)) raise
def main(): """Reports the state of the toil. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser() parser.add_argument("jobStore", type=str, help="The location of a job store that holds the information about the " "workflow whose status is to be reported on." + jobStoreLocatorHelp) parser.add_argument("--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of \ jobs that failed. default=%(default)s", default=False) parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true", help="Return exit value of 1 if toil jobs not all completed. default=%(default)s", default=False) parser.add_argument("--version", action='version', version=version) options = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for Toil") assert options.jobStore is not None config = Config() config.setOptions(options) ########################################## #Survey the status of the job and report. ########################################## jobStore = Toil.resumeJobStore(config.jobStore) try: rootJob = jobStore.loadRootJob() except JobException: print('The root job of the job store is absent, the workflow completed successfully.', file=sys.stderr) sys.exit(0) toilState = ToilState(jobStore, rootJob ) # The first element of the toilState.updatedJobs tuple is the jobWrapper we want to inspect totalJobs = set(toilState.successorCounts.keys()) | \ {jobTuple[0] for jobTuple in toilState.updatedJobs} failedJobs = [ job for job in totalJobs if job.remainingRetryCount == 0 ] print('There are %i active jobs, %i parent jobs with children, and %i totally failed jobs ' 'currently in %s.' % (len(toilState.updatedJobs), len(toilState.successorCounts), len(failedJobs), config.jobStore), file=sys.stderr) if options.verbose: #Verbose currently means outputting the files that have failed. for job in failedJobs: if job.logJobStoreFileID is not None: with job.getLogFileHandle(jobStore) as logFileHandle: logStream(logFileHandle, job.jobStoreID, logger.warn) else: print('Log file for job %s is absent.' % job.jobStoreID, file=sys.stderr) if len(failedJobs) == 0: print('There are no failed jobs to report.', file=sys.stderr) if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \ options.failIfNotComplete: sys.exit(1)
def main(): """Reports the state of the toil. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser() parser.add_argument("jobStore", type=str, help="The location of a job store that holds the information about the " "workflow whose status is to be reported on." + jobStoreLocatorHelp) parser.add_argument("--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of \ jobs that failed. default=%(default)s", default=False) parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true", help="Return exit value of 1 if toil jobs not all completed. default=%(default)s", default=False) parser.add_argument("--version", action='version', version=version) options = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for Toil") assert options.jobStore is not None config = Config() config.setOptions(options) ########################################## #Survey the status of the job and report. ########################################## jobStore = Toil.resumeJobStore(config.jobStore) try: rootJob = jobStore.loadRootJob() except JobException: print('The root job of the job store is absent, the workflow completed successfully.', file=sys.stderr) sys.exit(0) toilState = ToilState(jobStore, rootJob ) # The first element of the toilState.updatedJobs tuple is the jobGraph we want to inspect totalJobs = set(toilState.successorCounts.keys()) | \ {jobTuple[0] for jobTuple in toilState.updatedJobs} failedJobs = [ job for job in totalJobs if job.remainingRetryCount == 0 ] print('There are %i active jobs, %i parent jobs with children, and %i totally failed jobs ' 'currently in %s.' % (len(toilState.updatedJobs), len(toilState.successorCounts), len(failedJobs), config.jobStore), file=sys.stderr) if options.verbose: #Verbose currently means outputting the files that have failed. for job in failedJobs: if job.logJobStoreFileID is not None: with job.getLogFileHandle(jobStore) as logFileHandle: logStream(logFileHandle, job.jobStoreID, logger.warn) else: print('Log file for job %s is absent.' % job.jobStoreID, file=sys.stderr) if len(failedJobs) == 0: print('There are no failed jobs to report.', file=sys.stderr) if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \ options.failIfNotComplete: sys.exit(1)
def main(): """Reports the state of the toil. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser() parser.add_argument( "jobStore", type=str, help="The location of a job store that holds the information about the " "workflow whose status is to be reported on." + jobStoreLocatorHelp) parser.add_argument( "--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of \ jobs that failed. default=%(default)s", default=False) parser.add_argument( "--failIfNotComplete", dest="failIfNotComplete", action="store_true", help= "Return exit value of 1 if toil jobs not all completed. default=%(default)s", default=False) parser.add_argument("--version", action='version', version=version) options = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for Toil") assert options.jobStore is not None config = Config() config.setOptions(options) ########################################## #Survey the status of the job and report. ########################################## jobStore = Toil.resumeJobStore(config.jobStore) try: rootJob = jobStore.loadRootJob() except JobException: print( 'The root job of the job store is absent, the workflow completed successfully.', file=sys.stderr) sys.exit(0) def traverseGraph(jobGraph): foundJobStoreIDs = set() totalJobs = [] def inner(jobGraph): if jobGraph.jobStoreID in foundJobStoreIDs: return foundJobStoreIDs.add(jobGraph.jobStoreID) totalJobs.append(jobGraph) # Traverse jobs in stack for jobs in jobGraph.stack: for successorJobStoreID in map(lambda x: x.jobStoreID, jobs): if (successorJobStoreID not in foundJobStoreIDs and jobStore.exists(successorJobStoreID)): inner(jobStore.load(successorJobStoreID)) # Traverse service jobs for jobs in jobGraph.services: for serviceJobStoreID in map(lambda x: x.jobStoreID, jobs): if jobStore.exists(serviceJobStoreID): assert serviceJobStoreID not in foundJobStoreIDs foundJobStoreIDs.add(serviceJobStoreID) totalJobs.append(jobStore.load(serviceJobStoreID)) inner(jobGraph) return totalJobs logger.info('Traversing the job graph. This may take a couple minutes.') totalJobs = traverseGraph(rootJob) failedJobs = [] hasChildren = [] hasServices = [] services = [] currentlyRunnning = [] for job in totalJobs: if job.logJobStoreFileID is not None: failedJobs.append(job) if job.stack: hasChildren.append(job) elif job.remainingRetryCount != 0 and job.logJobStoreFileID != 0 and job.command: # The job has no children, hasn't failed, and has a command to run. This indicates that the job is # likely currently running, or at least could be run. currentlyRunnning.append(job) if job.services: hasServices.append(job) if job.startJobStoreID or job.terminateJobStoreID or job.errorJobStoreID: # these attributes are only set in service jobs services.append(job) logger.info( 'There are %i unfinished jobs, %i parent jobs with children, %i jobs with services, %i services, ' 'and %i totally failed jobs currently in %s.' % (len(totalJobs), len(hasChildren), len(hasServices), len(services), len(failedJobs), config.jobStore)) if currentlyRunnning: logger.info('These %i jobs are currently active: %s', len(currentlyRunnning), ' \n'.join(map(str, currentlyRunnning))) if options.verbose: #Verbose currently means outputting the files that have failed. if failedJobs: msg = "Outputting logs for the %i failed jobs" % (len(failedJobs)) msg += ": %s" % ", ".join( (str(failedJob) for failedJob in failedJobs)) for jobNode in failedJobs: job = jobStore.load(jobNode.jobStoreID) msg += "\n=========> Failed job %s \n" % jobNode with job.getLogFileHandle(jobStore) as fH: msg += fH.read() msg += "<=========\n" print(msg) else: print('There are no failed jobs to report.', file=sys.stderr) if totalJobs and options.failIfNotComplete: exit( 1 ) # when the workflow is complete, all jobs will have been removed from job store
def main(): """Reports the state of a Toil workflow.""" parser = getBasicOptionParser() parser.add_argument("jobStore", type=str, help="The location of a job store that holds the information about the " "workflow whose status is to be reported on." + jobStoreLocatorHelp) parser.add_argument("--failIfNotComplete", action="store_true", help="Return exit value of 1 if toil jobs not all completed. default=%(default)s", default=False) parser.add_argument("--noAggStats", dest="stats", action="store_false", help="Do not print overall, aggregate status of workflow.", default=True) parser.add_argument("--printDot", action="store_true", help="Print dot formatted description of the graph. If using --jobs will " "restrict to subgraph including only those jobs. default=%(default)s", default=False) parser.add_argument("--jobs", nargs='+', help="Restrict reporting to the following jobs (allows subsetting of the report).", default=None) parser.add_argument("--printPerJobStats", action="store_true", help="Print info about each job. default=%(default)s", default=False) parser.add_argument("--printLogs", action="store_true", help="Print the log files of jobs (if they exist). default=%(default)s", default=False) parser.add_argument("--printChildren", action="store_true", help="Print children of each job. default=%(default)s", default=False) parser.add_argument("--version", action='version', version=version) options = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) config = Config() config.setOptions(options) jobStore = Toil.resumeJobStore(config.jobStore) ########################################## # Gather the jobs to report ########################################## # Gather all jobs in the workflow in jobsToReport if options.jobs == None: rootJob = fetchRootJob(jobStore) logger.info('Traversing the job graph gathering jobs. This may take a couple of minutes.') jobsToReport = traverseJobGraph(rootJob, jobStore) # Only gather jobs specified in options.jobs else: jobsToReport = fetchUserJobs(jobStore, jobs=options.jobs) ########################################## # Report on the jobs ########################################## jobStats = report_on_jobs(jobsToReport, jobStore, options) hasChildren = jobStats['hasChildren'] readyToRun = jobStats['readyToRun'] zombies = jobStats['zombies'] hasServices = jobStats['hasServices'] services = jobStats['services'] hasLogFile = jobStats['hasLogFile'] properties = jobStats['properties'] childNumber = jobStats['childNumber'] if options.printPerJobStats: printAggregateJobStats(jobsToReport, properties, childNumber) if options.printLogs: printJobLog(jobsToReport, jobStore) if options.printChildren: printJobChildren(jobsToReport) if options.printDot: print_dot_chart(jobsToReport, jobStore_name=config.jobStore) if options.stats: print('Of the %i jobs considered, ' 'there are %i jobs with children, ' '%i jobs ready to run, ' '%i zombie jobs, ' '%i jobs with services, ' '%i services, ' 'and %i jobs with log files currently in %s.' % (len(jobsToReport), len(hasChildren), len(readyToRun), len(zombies), len(hasServices), len(services), len(hasLogFile), config.jobStore)) if len(jobsToReport) > 0 and options.failIfNotComplete: # Upon workflow completion, all jobs will have been removed from job store exit(1)
def main(): """Reports the state of the toil. """ ########################################## #Construct the arguments. ########################################## parser = getBasicOptionParser() parser.add_argument("jobStore", type=str, help="The location of a job store that holds the information about the " "workflow whose status is to be reported on." + jobStoreLocatorHelp) parser.add_argument("--verbose", dest="verbose", action="store_true", help="Print loads of information, particularly all the log files of \ jobs that failed. default=%(default)s", default=False) parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true", help="Return exit value of 1 if toil jobs not all completed. default=%(default)s", default=False) parser.add_argument("--version", action='version', version=version) options = parseBasicOptions(parser) logger.info("Parsed arguments") if len(sys.argv) == 1: parser.print_help() sys.exit(0) ########################################## #Do some checks. ########################################## logger.info("Checking if we have files for Toil") assert options.jobStore is not None config = Config() config.setOptions(options) ########################################## #Survey the status of the job and report. ########################################## jobStore = Toil.resumeJobStore(config.jobStore) try: rootJob = jobStore.loadRootJob() except JobException: print('The root job of the job store is absent, the workflow completed successfully.', file=sys.stderr) sys.exit(0) def traverseGraph(jobGraph): foundJobStoreIDs = set() totalJobs = [] def inner(jobGraph): if jobGraph.jobStoreID in foundJobStoreIDs: return foundJobStoreIDs.add(jobGraph.jobStoreID) totalJobs.append(jobGraph) # Traverse jobs in stack for jobs in jobGraph.stack: for successorJobStoreID in [x.jobStoreID for x in jobs]: if (successorJobStoreID not in foundJobStoreIDs and jobStore.exists(successorJobStoreID)): inner(jobStore.load(successorJobStoreID)) # Traverse service jobs for jobs in jobGraph.services: for serviceJobStoreID in [x.jobStoreID for x in jobs]: if jobStore.exists(serviceJobStoreID): assert serviceJobStoreID not in foundJobStoreIDs foundJobStoreIDs.add(serviceJobStoreID) totalJobs.append(jobStore.load(serviceJobStoreID)) inner(jobGraph) return totalJobs logger.info('Traversing the job graph. This may take a couple minutes.') totalJobs = traverseGraph(rootJob) failedJobs = [] hasChildren = [] hasServices = [] services = [] currentlyRunnning = [] for job in totalJobs: if job.logJobStoreFileID is not None: failedJobs.append(job) if job.stack: hasChildren.append(job) elif job.remainingRetryCount != 0 and job.logJobStoreFileID != 0 and job.command: # The job has no children, hasn't failed, and has a command to run. This indicates that the job is # likely currently running, or at least could be run. currentlyRunnning.append(job) if job.services: hasServices.append(job) if job.startJobStoreID or job.terminateJobStoreID or job.errorJobStoreID: # these attributes are only set in service jobs services.append(job) logger.info('There are %i unfinished jobs, %i parent jobs with children, %i jobs with services, %i services, ' 'and %i totally failed jobs currently in %s.' % (len(totalJobs), len(hasChildren), len(hasServices), len(services), len(failedJobs), config.jobStore)) if currentlyRunnning: logger.info('These %i jobs are currently active: %s', len(currentlyRunnning), ' \n'.join(map(str, currentlyRunnning))) if options.verbose: #Verbose currently means outputting the files that have failed. if failedJobs: msg = "Outputting logs for the %i failed jobs" % (len(failedJobs)) msg += ": %s" % ", ".join((str(failedJob) for failedJob in failedJobs)) for jobNode in failedJobs: job = jobStore.load(jobNode.jobStoreID) msg += "\n=========> Failed job %s \n" % jobNode with job.getLogFileHandle(jobStore) as fH: msg += fH.read() msg += "<=========\n" print(msg) else: print('There are no failed jobs to report.', file=sys.stderr) if totalJobs and options.failIfNotComplete: exit(1) # when the workflow is complete, all jobs will have been removed from job store
def main() -> None: """Reports the state of a Toil workflow.""" parser = parser_with_common_options() parser.add_argument( "--failIfNotComplete", action="store_true", help= "Return exit value of 1 if toil jobs not all completed. default=%(default)s", default=False) parser.add_argument( "--noAggStats", dest="stats", action="store_false", help="Do not print overall, aggregate status of workflow.", default=True) parser.add_argument( "--printDot", action="store_true", help= "Print dot formatted description of the graph. If using --jobs will " "restrict to subgraph including only those jobs. default=%(default)s", default=False) parser.add_argument( "--jobs", nargs='+', help= "Restrict reporting to the following jobs (allows subsetting of the report).", default=None) parser.add_argument("--printPerJobStats", action="store_true", help="Print info about each job. default=%(default)s", default=False) parser.add_argument( "--printLogs", action="store_true", help="Print the log files of jobs (if they exist). default=%(default)s", default=False) parser.add_argument("--printChildren", action="store_true", help="Print children of each job. default=%(default)s", default=False) options = parser.parse_args() set_logging_from_options(options) if len(sys.argv) == 1: parser.print_help() sys.exit(0) config = Config() config.setOptions(options) try: status = ToilStatus(config.jobStore, options.jobs) except NoSuchJobStoreException: print('No job store found.') return except JobException: # Workflow likely complete, user informed in ToilStatus() return jobStats = status.report_on_jobs() # Info to be reported. hasChildren = jobStats['hasChildren'] readyToRun = jobStats['readyToRun'] zombies = jobStats['zombies'] hasServices = jobStats['hasServices'] services = jobStats['services'] hasLogFile = jobStats['hasLogFile'] properties = jobStats['properties'] childNumber = jobStats['childNumber'] if options.printPerJobStats: status.printAggregateJobStats(properties, childNumber) if options.printLogs: status.printJobLog() if options.printChildren: status.printJobChildren() if options.printDot: status.print_dot_chart() if options.stats: print('Of the %i jobs considered, ' 'there are %i jobs with children, ' '%i jobs ready to run, ' '%i zombie jobs, ' '%i jobs with services, ' '%i services, ' 'and %i jobs with log files currently in %s.' % (len(status.jobsToReport), len(hasChildren), len(readyToRun), len(zombies), len(hasServices), len(services), len(hasLogFile), status.jobStore)) if len(status.jobsToReport) > 0 and options.failIfNotComplete: # Upon workflow completion, all jobs will have been removed from job store exit(1)