示例#1
0
def main():
    parser = parser_with_common_options()
    options = parser.parse_args()
    set_logging_from_options(options)
    config = Config()
    config.setOptions(options)
    config.jobStore = config.jobStore[5:] if config.jobStore.startswith('file:') else config.jobStore

    # ':' means an aws/google jobstore; use the old (broken?) method
    if ':' in config.jobStore:
        jobStore = Toil.resumeJobStore(config.jobStore)
        logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore)
        # TODO: This behaviour is now broken: https://github.com/DataBiosphere/toil/commit/a3d65fc8925712221e4cda116d1825d4a1e963a1
        batchSystem = Toil.createBatchSystem(jobStore.config)  # Should automatically kill existing jobs, so we're good.
        for jobID in batchSystem.getIssuedBatchJobIDs():  # Just in case we do it again.
            batchSystem.killBatchJobs(jobID)
        logger.info("All jobs SHOULD have been killed")
    # otherwise, kill the pid recorded in the jobstore
    else:
        pid_log = os.path.join(os.path.abspath(config.jobStore), 'pid.log')
        with open(pid_log, 'r') as f:
            pid2kill = f.read().strip()
        try:
            os.kill(int(pid2kill), signal.SIGKILL)
            logger.info("Toil process %s successfully terminated." % str(pid2kill))
        except OSError:
            logger.error("Toil process %s could not be terminated." % str(pid2kill))
            raise
示例#2
0
def main():
    parser = parser_with_common_options(jobstore_option=True)
    parser.add_argument(
        "jobID",
        nargs=1,
        help=
        "The job store id of a job within the provided jobstore to run by itself."
    )
    parser.add_argument(
        "--printJobInfo",
        nargs=1,
        help=
        "Return information about this job to the user including preceding jobs, "
        "inputs, outputs, and runtime from the last known run.")

    options = parser.parse_args()
    set_logging_from_options(options)
    config = Config()
    config.setOptions(options)

    jobStore = Toil.resumeJobStore(config.jobStore)

    if options.printJobInfo:
        printContentsOfJobStore(jobStorePath=config.jobStore,
                                nameOfJob=options.printJobInfo)

    # TODO: Option to print list of successor jobs
    # TODO: Option to run job within python debugger, allowing step through of arguments
    # idea would be to have option to import pdb and set breakpoint at the start of the user's code

    jobID = options.jobID[0]
    logger.debug(f"Running the following job locally: {jobID}")
    workerScript(jobStore, config, jobID, jobID, redirectOutputToLogFile=False)
    logger.debug(f"Finished running: {jobID}")
示例#3
0
def main():
    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help=
        "The location of the job store used by the workflow whose jobs should "
        "be killed." + jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)

    logger.info(
        "Starting routine to kill running jobs in the toil workflow: %s",
        config.jobStore)
    ####This behaviour is now broken
    batchSystem = Toil.createBatchSystem(
        jobStore.config
    )  #This should automatically kill the existing jobs.. so we're good.
    for jobID in batchSystem.getIssuedBatchJobIDs(
    ):  #Just in case we do it again.
        batchSystem.killBatchJobs(jobID)
    logger.info("All jobs SHOULD have been killed")
示例#4
0
def main():
    parser = getBasicOptionParser()
    parser.add_argument("jobStore", type=str,
                        help="The location of the job store to delete. " + jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    config = Config()
    config.setOptions(parseBasicOptions(parser))
    logger.info("Attempting to delete the job store")
    jobStore = Toil.getJobStore(config.jobStore)
    jobStore.destroy()
    logger.info("Successfully deleted the job store")
示例#5
0
def main():
    """ Reports stats on the workflow, use with --stats option to toil.
    """
    parser = getBasicOptionParser()
    initializeOptions(parser)
    options = parseBasicOptions(parser)
    checkOptions(options, parser)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)
    stats = getStats(jobStore)
    collatedStatsTag = processData(jobStore.config, stats)
    reportData(collatedStatsTag, options)
示例#6
0
def main():
    """ Reports stats on the workflow, use with --stats option to toil.
    """
    parser = getBasicOptionParser()
    initializeOptions(parser)
    options = parseBasicOptions(parser)
    checkOptions(options, parser)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)
    stats = getStats(jobStore)
    collatedStatsTag = processData(jobStore.config, stats)
    reportData(collatedStatsTag, options)
示例#7
0
def main():
    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help="The location of the job store used by the workflow." +
        jobStoreLocatorHelp)
    parser.add_argument("--localFilePath",
                        nargs=1,
                        help="Location to which to copy job store files.")
    parser.add_argument("--fetch",
                        nargs="+",
                        help="List of job-store files to be copied locally."
                        "Use either explicit names (i.e. 'data.txt'), or "
                        "specify glob patterns (i.e. '*.txt')")
    parser.add_argument(
        "--listFilesInJobStore",
        help="Prints a list of the current files in the jobStore.")
    parser.add_argument(
        "--fetchEntireJobStore",
        help="Copy all job store files into a local directory.")
    parser.add_argument(
        "--useSymlinks",
        help="Creates symlink 'shortcuts' of files in the localFilePath"
        " instead of hardlinking or copying, where possible.  If this is"
        " not possible, it will copy the files (shutil.copyfile()).")
    parser.add_argument("--version", action='version', version=version)

    # Load the jobStore
    options = parseBasicOptions(parser)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)
    logger.debug("Connected to job store: %s", config.jobStore)

    if options.fetch:
        # Copy only the listed files locally
        logger.debug("Fetching local files: %s", options.fetch)
        fetchJobStoreFiles(jobStore=jobStore, options=options)

    elif options.fetchEntireJobStore:
        # Copy all jobStore files locally
        logger.debug("Fetching all local files.")
        options.fetch = "*"
        fetchJobStoreFiles(jobStore=jobStore, options=options)

    if options.listFilesInJobStore:
        # Log filenames and create a file containing these names in cwd
        printContentsOfJobStore(jobStorePath=options.jobStore)
示例#8
0
 def testMultipleJobsPerWorkerStats(self):
     """
     Tests case where multiple jobs are run on 1 worker to insure that all jobs report back their data
     """
     options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
     options.clean = 'never'
     options.stats = True
     Job.Runner.startToil(RunTwoJobsPerWorker(), options)
     config = Config()
     config.setOptions(options)
     jobStore = Toil.resumeJobStore(config.jobStore)
     stats = getStats(jobStore)
     collatedStats = processData(jobStore.config, stats)
     self.assertTrue(len(collatedStats.job_types) == 2,
                     "Some jobs are not represented in the stats")
示例#9
0
 def testMultipleJobsPerWorkerStats(self):
     """
     Tests case where multiple jobs are run on 1 worker to insure that all jobs report back their data
     """
     options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
     options.clean = 'never'
     options.stats = True
     Job.Runner.startToil(RunTwoJobsPerWorker(), options)
     config = Config()
     config.setOptions(options)
     jobStore = Toil.resumeJobStore(config.jobStore)
     stats = getStats(jobStore)
     collatedStats = processData(jobStore.config, stats)
     self.assertTrue(len(collatedStats.job_types) == 2,
                     "Some jobs are not represented in the stats")
示例#10
0
文件: pipeline.py 项目: eharkins/cft
def main():
    """
    This is a Toil pipeline to transfer TCGA data into an S3 Bucket

    Data is pulled down with Genetorrent and transferred to S3 via S3AM.
    """

    # Define Parser object and add to toil
    def existing_file(fname):
        """
        Argparse type for an existing file
        """
        if not os.path.isfile(fname):
            raise ValueError("Invalid file: " + str(fname))
        return fname

    parser = argparse.ArgumentParser(
        description=main.__doc__,
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument(
        '--sudo',
        dest='sudo',
        default=None,
        action='store_true',
        help=
        'Docker usually needs sudo to execute locally, but not when running Mesos or when '
        'the user is a member of a Docker group.')
    Job.Runner.addToilOptions(parser)
    parser.add_argument('datafiles',
                        nargs='+',
                        help='FASTA input',
                        type=existing_file)

    args = parser.parse_args()

    assert args.jobStore is not None
    config = Config()
    config.setOptions(args)

    # Store inputs from argparse
    inputs = {'sudo': args.sudo}
    datafiles = [os.path.abspath(d) for d in args.datafiles]
    # Start Pipeline
    options = Job.Runner.getDefaultOptions("./toilWorkflow")

    Job.Runner.startToil(Job.wrapJobFn(start_batch, datafiles, inputs),
                         options)
示例#11
0
文件: toilKill.py 项目: Duke-GCB/toil
def main():
    parser = getBasicOptionParser()

    parser.add_argument("jobStore", type=str,
                        help="The location of the job store used by the workflow whose jobs should "
                             "be killed." + jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)

    logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore)
    ####This behaviour is now broken
    batchSystem = Toil.createBatchSystem(jobStore.config) #This should automatically kill the existing jobs.. so we're good.
    for jobID in batchSystem.getIssuedBatchJobIDs(): #Just in case we do it again.
        batchSystem.killBatchJobs(jobID)
    logger.info("All jobs SHOULD have been killed")
示例#12
0
def main():
    parser = getBasicOptionParser()
    parser.add_argument("jobStore",
                        type=str,
                        help="The location of the job store to delete. " +
                        jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    config = Config()
    config.setOptions(parseBasicOptions(parser))
    try:
        jobStore = Toil.getJobStore(config.jobStore)
        jobStore.resume()
        jobStore.destroy()
        logger.info("Successfully deleted the job store: %s" % config.jobStore)
    except NoSuchJobStoreException:
        logger.info("Failed to delete the job store: %s is non-existent" %
                    config.jobStore)
    except:
        logger.info("Failed to delete the job store: %s" % config.jobStore)
        raise
示例#13
0
def main() -> None:
    """Reports stats on the workflow, use with --stats option to toil."""
    parser = parser_with_common_options()
    add_stats_options(parser)
    options = parser.parse_args()

    for c in options.categories.split(","):
        if c.strip() not in category_choices:
            raise ValueError(f'{c} not in {category_choices}!')
    options.categories = [
        x.strip().lower() for x in options.categories.split(",")
    ]

    set_logging_from_options(options)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)
    stats = getStats(jobStore)
    collatedStatsTag = processData(jobStore.config, stats)
    reportData(collatedStatsTag, options)
示例#14
0
def main():
    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help="The location of the job store used by the workflow." +
        jobStoreLocatorHelp)
    parser.add_argument("jobID",
                        nargs=1,
                        help="The job store id of a job "
                        "within the provided jobstore to run by itself.")
    parser.add_argument(
        "--printJobInfo",
        nargs=1,
        help="Return information about this job to the user"
        " including preceding jobs, inputs, outputs, and runtime"
        " from the last known run.")
    parser.add_argument("--version", action='version', version=version)

    # Parse options
    options = parseBasicOptions(parser)
    config = Config()
    config.setOptions(options)

    # Load the job store
    jobStore = Toil.resumeJobStore(config.jobStore)

    if options.printJobInfo:
        printContentsOfJobStore(jobStorePath=options.jobStore,
                                nameOfJob=options.printJobInfo)

    # TODO: Option to print list of successor jobs
    # TODO: Option to run job within python debugger, allowing step through of arguments
    # idea would be to have option to import pdb and set breakpoint at the start of the user's code

    # Run the job locally
    jobID = options.jobID[0]
    logger.debug("Going to run the following job locally: %s", jobID)
    workerScript(jobStore, config, jobID, jobID, redirectOutputToLogFile=False)
    logger.debug("Ran the following job locally: %s", jobID)
示例#15
0
def main() -> None:
    parser = parser_with_common_options()
    options = parser.parse_args()
    set_logging_from_options(options)
    config = Config()
    config.setOptions(options)

    job_store_type, _ = Toil.parseLocator(config.jobStore)

    if job_store_type != 'file':
        # Remote (aws/google) jobstore; use the old (broken?) method
        job_store = Toil.resumeJobStore(config.jobStore)
        logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore)
        # TODO: This behaviour is now broken: https://github.com/DataBiosphere/toil/commit/a3d65fc8925712221e4cda116d1825d4a1e963a1
        # There's no guarantee that the batch system in use can enumerate
        # running jobs belonging to the job store we've attached to. And
        # moreover we don't even bother trying to kill the leader at its
        # recorded PID, even if it is a local process.
        batch_system = Toil.createBatchSystem(job_store.config)  # Should automatically kill existing jobs, so we're good.
        for job_id in batch_system.getIssuedBatchJobIDs():  # Just in case we do it again.
            batch_system.killBatchJobs([job_id])
        logger.info("All jobs SHOULD have been killed")
    else:
        # otherwise, kill the pid recorded in the jobstore.
        # TODO: We assume thnis is a local PID.
        job_store = Toil.resumeJobStore(config.jobStore)
        assert isinstance(job_store, FileJobStore), "Need a FileJobStore which has a sharedFilesDir"
        pid_log = os.path.join(job_store.sharedFilesDir, 'pid.log')
        with open(pid_log) as f:
            pid_to_kill = f.read().strip()
        try:
            os.kill(int(pid_to_kill), signal.SIGTERM)
            logger.info("Toil process %s successfully terminated." % str(pid_to_kill))
        except OSError:
            logger.error("Toil process %s could not be terminated." % str(pid_to_kill))
            raise
示例#16
0
def main():
    """Reports the state of the toil.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser()
    
    parser.add_argument("jobStore", type=str,
                        help="The location of a job store that holds the information about the "
                             "workflow whose status is to be reported on." + jobStoreLocatorHelp)
    
    parser.add_argument("--verbose", dest="verbose", action="store_true",
                      help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%(default)s",
                      default=False)
    
    parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true",
                      help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
                      default=False)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for Toil")
    assert options.jobStore is not None
    config = Config()
    config.setOptions(options)
    ##########################################
    #Survey the status of the job and report.
    ##########################################  
    
    jobStore = Toil.resumeJobStore(config.jobStore)
    try:
        rootJob = jobStore.loadRootJob()
    except JobException:
        print('The root job of the job store is absent, the workflow completed successfully.',
              file=sys.stderr)
        sys.exit(0)
    
    toilState = ToilState(jobStore, rootJob )

    # The first element of the toilState.updatedJobs tuple is the jobWrapper we want to inspect
    totalJobs = set(toilState.successorCounts.keys()) | \
                {jobTuple[0] for jobTuple in toilState.updatedJobs}

    failedJobs = [ job for job in totalJobs if job.remainingRetryCount == 0 ]

    print('There are %i active jobs, %i parent jobs with children, and %i totally failed jobs '
          'currently in %s.' % (len(toilState.updatedJobs), len(toilState.successorCounts),
                                len(failedJobs), config.jobStore), file=sys.stderr)
    
    if options.verbose: #Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if job.logJobStoreFileID is not None:
                with job.getLogFileHandle(jobStore) as logFileHandle:
                    logStream(logFileHandle, job.jobStoreID, logger.warn)
            else:
                print('Log file for job %s is absent.' % job.jobStoreID, file=sys.stderr)
        if len(failedJobs) == 0:
            print('There are no failed jobs to report.', file=sys.stderr)
    
    if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \
        options.failIfNotComplete:
        sys.exit(1)
示例#17
0
def main():
    """Reports the state of the toil.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser()
    
    parser.add_argument("jobStore", type=str,
                        help="The location of a job store that holds the information about the "
                             "workflow whose status is to be reported on." + jobStoreLocatorHelp)
    
    parser.add_argument("--verbose", dest="verbose", action="store_true",
                      help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%(default)s",
                      default=False)
    
    parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true",
                      help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
                      default=False)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for Toil")
    assert options.jobStore is not None
    config = Config()
    config.setOptions(options)
    ##########################################
    #Survey the status of the job and report.
    ##########################################  
    
    jobStore = Toil.resumeJobStore(config.jobStore)
    try:
        rootJob = jobStore.loadRootJob()
    except JobException:
        print('The root job of the job store is absent, the workflow completed successfully.',
              file=sys.stderr)
        sys.exit(0)
    
    toilState = ToilState(jobStore, rootJob )

    # The first element of the toilState.updatedJobs tuple is the jobGraph we want to inspect
    totalJobs = set(toilState.successorCounts.keys()) | \
                {jobTuple[0] for jobTuple in toilState.updatedJobs}

    failedJobs = [ job for job in totalJobs if job.remainingRetryCount == 0 ]

    print('There are %i active jobs, %i parent jobs with children, and %i totally failed jobs '
          'currently in %s.' % (len(toilState.updatedJobs), len(toilState.successorCounts),
                                len(failedJobs), config.jobStore), file=sys.stderr)
    
    if options.verbose: #Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if job.logJobStoreFileID is not None:
                with job.getLogFileHandle(jobStore) as logFileHandle:
                    logStream(logFileHandle, job.jobStoreID, logger.warn)
            else:
                print('Log file for job %s is absent.' % job.jobStoreID, file=sys.stderr)
        if len(failedJobs) == 0:
            print('There are no failed jobs to report.', file=sys.stderr)
    
    if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \
        options.failIfNotComplete:
        sys.exit(1)
示例#18
0
def main():
    """Reports the state of the toil.
    """

    ##########################################
    #Construct the arguments.
    ##########################################

    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help="The location of a job store that holds the information about the "
        "workflow whose status is to be reported on." + jobStoreLocatorHelp)

    parser.add_argument(
        "--verbose",
        dest="verbose",
        action="store_true",
        help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%(default)s",
        default=False)

    parser.add_argument(
        "--failIfNotComplete",
        dest="failIfNotComplete",
        action="store_true",
        help=
        "Return exit value of 1 if toil jobs not all completed. default=%(default)s",
        default=False)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    ##########################################
    #Do some checks.
    ##########################################

    logger.info("Checking if we have files for Toil")
    assert options.jobStore is not None
    config = Config()
    config.setOptions(options)
    ##########################################
    #Survey the status of the job and report.
    ##########################################

    jobStore = Toil.resumeJobStore(config.jobStore)
    try:
        rootJob = jobStore.loadRootJob()
    except JobException:
        print(
            'The root job of the job store is absent, the workflow completed successfully.',
            file=sys.stderr)
        sys.exit(0)

    def traverseGraph(jobGraph):
        foundJobStoreIDs = set()
        totalJobs = []

        def inner(jobGraph):
            if jobGraph.jobStoreID in foundJobStoreIDs:
                return
            foundJobStoreIDs.add(jobGraph.jobStoreID)
            totalJobs.append(jobGraph)
            # Traverse jobs in stack
            for jobs in jobGraph.stack:
                for successorJobStoreID in map(lambda x: x.jobStoreID, jobs):
                    if (successorJobStoreID not in foundJobStoreIDs
                            and jobStore.exists(successorJobStoreID)):
                        inner(jobStore.load(successorJobStoreID))

            # Traverse service jobs
            for jobs in jobGraph.services:
                for serviceJobStoreID in map(lambda x: x.jobStoreID, jobs):
                    if jobStore.exists(serviceJobStoreID):
                        assert serviceJobStoreID not in foundJobStoreIDs
                        foundJobStoreIDs.add(serviceJobStoreID)
                        totalJobs.append(jobStore.load(serviceJobStoreID))

        inner(jobGraph)
        return totalJobs

    logger.info('Traversing the job graph. This may take a couple minutes.')
    totalJobs = traverseGraph(rootJob)

    failedJobs = []
    hasChildren = []
    hasServices = []
    services = []
    currentlyRunnning = []

    for job in totalJobs:
        if job.logJobStoreFileID is not None:
            failedJobs.append(job)
        if job.stack:
            hasChildren.append(job)
        elif job.remainingRetryCount != 0 and job.logJobStoreFileID != 0 and job.command:
            # The job has no children, hasn't failed, and has a command to run. This indicates that the job is
            # likely currently running, or at least could be run.
            currentlyRunnning.append(job)
        if job.services:
            hasServices.append(job)
        if job.startJobStoreID or job.terminateJobStoreID or job.errorJobStoreID:
            # these attributes are only set in service jobs
            services.append(job)

    logger.info(
        'There are %i unfinished jobs, %i parent jobs with children, %i jobs with services, %i services, '
        'and %i totally failed jobs currently in %s.' %
        (len(totalJobs), len(hasChildren), len(hasServices), len(services),
         len(failedJobs), config.jobStore))

    if currentlyRunnning:
        logger.info('These %i jobs are currently active: %s',
                    len(currentlyRunnning),
                    ' \n'.join(map(str, currentlyRunnning)))

    if options.verbose:  #Verbose currently means outputting the files that have failed.
        if failedJobs:
            msg = "Outputting logs for the %i failed jobs" % (len(failedJobs))
            msg += ": %s" % ", ".join(
                (str(failedJob) for failedJob in failedJobs))
            for jobNode in failedJobs:
                job = jobStore.load(jobNode.jobStoreID)
                msg += "\n=========> Failed job %s \n" % jobNode
                with job.getLogFileHandle(jobStore) as fH:
                    msg += fH.read()
                msg += "<=========\n"
            print(msg)
        else:
            print('There are no failed jobs to report.', file=sys.stderr)

    if totalJobs and options.failIfNotComplete:
        exit(
            1
        )  # when the workflow is complete, all jobs will have been removed from job store
示例#19
0
def main():
    """Reports the state of a Toil workflow."""
    parser = getBasicOptionParser()

    parser.add_argument("jobStore", type=str,
                        help="The location of a job store that holds the information about the "
                             "workflow whose status is to be reported on." + jobStoreLocatorHelp)

    parser.add_argument("--failIfNotComplete", action="store_true",
                        help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
                        default=False)

    parser.add_argument("--noAggStats", dest="stats", action="store_false",
                        help="Do not print overall, aggregate status of workflow.",
                        default=True)

    parser.add_argument("--printDot", action="store_true",
                        help="Print dot formatted description of the graph. If using --jobs will "
                             "restrict to subgraph including only those jobs. default=%(default)s",
                        default=False)

    parser.add_argument("--jobs", nargs='+',
                        help="Restrict reporting to the following jobs (allows subsetting of the report).",
                        default=None)

    parser.add_argument("--printPerJobStats", action="store_true",
                        help="Print info about each job. default=%(default)s",
                        default=False)

    parser.add_argument("--printLogs", action="store_true",
                        help="Print the log files of jobs (if they exist). default=%(default)s",
                        default=False)

    parser.add_argument("--printChildren", action="store_true",
                        help="Print children of each job. default=%(default)s",
                        default=False)

    parser.add_argument("--version", action='version', version=version)

    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)

    ##########################################
    # Gather the jobs to report
    ##########################################

    # Gather all jobs in the workflow in jobsToReport
    if options.jobs == None:
        rootJob = fetchRootJob(jobStore)
        logger.info('Traversing the job graph gathering jobs. This may take a couple of minutes.')
        jobsToReport = traverseJobGraph(rootJob, jobStore)

    # Only gather jobs specified in options.jobs
    else:
        jobsToReport = fetchUserJobs(jobStore, jobs=options.jobs)

    ##########################################
    # Report on the jobs
    ##########################################

    jobStats = report_on_jobs(jobsToReport, jobStore, options)

    hasChildren = jobStats['hasChildren']
    readyToRun = jobStats['readyToRun']
    zombies = jobStats['zombies']
    hasServices = jobStats['hasServices']
    services = jobStats['services']
    hasLogFile = jobStats['hasLogFile']
    properties = jobStats['properties']
    childNumber = jobStats['childNumber']

    if options.printPerJobStats:
        printAggregateJobStats(jobsToReport, properties, childNumber)

    if options.printLogs:
        printJobLog(jobsToReport, jobStore)

    if options.printChildren:
        printJobChildren(jobsToReport)

    if options.printDot:
        print_dot_chart(jobsToReport, jobStore_name=config.jobStore)

    if options.stats:
        print('Of the %i jobs considered, '
           'there are %i jobs with children, '
           '%i jobs ready to run, '
           '%i zombie jobs, '
           '%i jobs with services, '
           '%i services, '
           'and %i jobs with log files currently in %s.' %
            (len(jobsToReport), len(hasChildren), len(readyToRun), len(zombies),
             len(hasServices), len(services), len(hasLogFile), config.jobStore))

    if len(jobsToReport) > 0 and options.failIfNotComplete:
        # Upon workflow completion, all jobs will have been removed from job store
        exit(1)
示例#20
0
def main():
    """Reports the state of the toil.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser()
    
    parser.add_argument("jobStore", type=str,
                        help="The location of a job store that holds the information about the "
                             "workflow whose status is to be reported on." + jobStoreLocatorHelp)
    
    parser.add_argument("--verbose", dest="verbose", action="store_true",
                      help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%(default)s",
                      default=False)
    
    parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true",
                      help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
                      default=False)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for Toil")
    assert options.jobStore is not None
    config = Config()
    config.setOptions(options)
    ##########################################
    #Survey the status of the job and report.
    ##########################################  
    
    jobStore = Toil.resumeJobStore(config.jobStore)
    try:
        rootJob = jobStore.loadRootJob()
    except JobException:
        print('The root job of the job store is absent, the workflow completed successfully.',
              file=sys.stderr)
        sys.exit(0)

    def traverseGraph(jobGraph):
        foundJobStoreIDs = set()
        totalJobs = []
        def inner(jobGraph):
            if jobGraph.jobStoreID in foundJobStoreIDs:
                return
            foundJobStoreIDs.add(jobGraph.jobStoreID)
            totalJobs.append(jobGraph)
            # Traverse jobs in stack
            for jobs in jobGraph.stack:
                for successorJobStoreID in [x.jobStoreID for x in jobs]:
                    if (successorJobStoreID not in foundJobStoreIDs and jobStore.exists(successorJobStoreID)):
                        inner(jobStore.load(successorJobStoreID))

            # Traverse service jobs
            for jobs in jobGraph.services:
                for serviceJobStoreID in [x.jobStoreID for x in jobs]:
                    if jobStore.exists(serviceJobStoreID):
                        assert serviceJobStoreID not in foundJobStoreIDs
                        foundJobStoreIDs.add(serviceJobStoreID)
                        totalJobs.append(jobStore.load(serviceJobStoreID))
        inner(jobGraph)
        return totalJobs

    logger.info('Traversing the job graph. This may take a couple minutes.')
    totalJobs = traverseGraph(rootJob)

    failedJobs = []
    hasChildren = []
    hasServices = []
    services = []
    currentlyRunnning = []

    for job in totalJobs:
        if job.logJobStoreFileID is not None:
            failedJobs.append(job)
        if job.stack:
            hasChildren.append(job)
        elif job.remainingRetryCount != 0 and job.logJobStoreFileID != 0 and job.command:
            # The job has no children, hasn't failed, and has a command to run. This indicates that the job is
            # likely currently running, or at least could be run.
            currentlyRunnning.append(job)
        if job.services:
            hasServices.append(job)
        if job.startJobStoreID or job.terminateJobStoreID or job.errorJobStoreID:
            # these attributes are only set in service jobs
            services.append(job)

    logger.info('There are %i unfinished jobs, %i parent jobs with children, %i jobs with services, %i services, '
                'and %i totally failed jobs currently in %s.' %
                (len(totalJobs), len(hasChildren), len(hasServices), len(services), len(failedJobs), config.jobStore))

    if currentlyRunnning:
        logger.info('These %i jobs are currently active: %s',
                    len(currentlyRunnning), ' \n'.join(map(str, currentlyRunnning)))

    if options.verbose: #Verbose currently means outputting the files that have failed.
        if failedJobs:
            msg = "Outputting logs for the %i failed jobs" % (len(failedJobs))
            msg += ": %s" % ", ".join((str(failedJob) for failedJob in failedJobs))
            for jobNode in failedJobs:
                job = jobStore.load(jobNode.jobStoreID)
                msg += "\n=========> Failed job %s \n" % jobNode
                with job.getLogFileHandle(jobStore) as fH:
                    msg += fH.read()
                msg += "<=========\n"
            print(msg)
        else:
            print('There are no failed jobs to report.', file=sys.stderr)

    if totalJobs and options.failIfNotComplete:
        exit(1) # when the workflow is complete, all jobs will have been removed from job store
示例#21
0
文件: toilStatus.py 项目: mr-c/toil
def main() -> None:
    """Reports the state of a Toil workflow."""
    parser = parser_with_common_options()
    parser.add_argument(
        "--failIfNotComplete",
        action="store_true",
        help=
        "Return exit value of 1 if toil jobs not all completed. default=%(default)s",
        default=False)

    parser.add_argument(
        "--noAggStats",
        dest="stats",
        action="store_false",
        help="Do not print overall, aggregate status of workflow.",
        default=True)

    parser.add_argument(
        "--printDot",
        action="store_true",
        help=
        "Print dot formatted description of the graph. If using --jobs will "
        "restrict to subgraph including only those jobs. default=%(default)s",
        default=False)

    parser.add_argument(
        "--jobs",
        nargs='+',
        help=
        "Restrict reporting to the following jobs (allows subsetting of the report).",
        default=None)

    parser.add_argument("--printPerJobStats",
                        action="store_true",
                        help="Print info about each job. default=%(default)s",
                        default=False)

    parser.add_argument(
        "--printLogs",
        action="store_true",
        help="Print the log files of jobs (if they exist). default=%(default)s",
        default=False)

    parser.add_argument("--printChildren",
                        action="store_true",
                        help="Print children of each job. default=%(default)s",
                        default=False)

    options = parser.parse_args()
    set_logging_from_options(options)

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    config = Config()
    config.setOptions(options)

    try:
        status = ToilStatus(config.jobStore, options.jobs)
    except NoSuchJobStoreException:
        print('No job store found.')
        return
    except JobException:  # Workflow likely complete, user informed in ToilStatus()
        return

    jobStats = status.report_on_jobs()

    # Info to be reported.
    hasChildren = jobStats['hasChildren']
    readyToRun = jobStats['readyToRun']
    zombies = jobStats['zombies']
    hasServices = jobStats['hasServices']
    services = jobStats['services']
    hasLogFile = jobStats['hasLogFile']
    properties = jobStats['properties']
    childNumber = jobStats['childNumber']

    if options.printPerJobStats:
        status.printAggregateJobStats(properties, childNumber)
    if options.printLogs:
        status.printJobLog()
    if options.printChildren:
        status.printJobChildren()
    if options.printDot:
        status.print_dot_chart()
    if options.stats:
        print('Of the %i jobs considered, '
              'there are %i jobs with children, '
              '%i jobs ready to run, '
              '%i zombie jobs, '
              '%i jobs with services, '
              '%i services, '
              'and %i jobs with log files currently in %s.' %
              (len(status.jobsToReport), len(hasChildren), len(readyToRun),
               len(zombies), len(hasServices), len(services), len(hasLogFile),
               status.jobStore))

    if len(status.jobsToReport) > 0 and options.failIfNotComplete:
        # Upon workflow completion, all jobs will have been removed from job store
        exit(1)