Пример #1
0
def main():
    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help=
        "The location of the job store used by the workflow whose jobs should "
        "be killed." + jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)

    logger.info(
        "Starting routine to kill running jobs in the toil workflow: %s",
        config.jobStore)
    ####This behaviour is now broken
    batchSystem = Toil.createBatchSystem(
        jobStore.config
    )  #This should automatically kill the existing jobs.. so we're good.
    for jobID in batchSystem.getIssuedBatchJobIDs(
    ):  #Just in case we do it again.
        batchSystem.killBatchJobs(jobID)
    logger.info("All jobs SHOULD have been killed")
Пример #2
0
def main():
    parser = parser_with_common_options()
    options = parser.parse_args()
    set_logging_from_options(options)
    config = Config()
    config.setOptions(options)
    config.jobStore = config.jobStore[5:] if config.jobStore.startswith('file:') else config.jobStore

    # ':' means an aws/google jobstore; use the old (broken?) method
    if ':' in config.jobStore:
        jobStore = Toil.resumeJobStore(config.jobStore)
        logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore)
        # TODO: This behaviour is now broken: https://github.com/DataBiosphere/toil/commit/a3d65fc8925712221e4cda116d1825d4a1e963a1
        batchSystem = Toil.createBatchSystem(jobStore.config)  # Should automatically kill existing jobs, so we're good.
        for jobID in batchSystem.getIssuedBatchJobIDs():  # Just in case we do it again.
            batchSystem.killBatchJobs(jobID)
        logger.info("All jobs SHOULD have been killed")
    # otherwise, kill the pid recorded in the jobstore
    else:
        pid_log = os.path.join(os.path.abspath(config.jobStore), 'pid.log')
        with open(pid_log, 'r') as f:
            pid2kill = f.read().strip()
        try:
            os.kill(int(pid2kill), signal.SIGKILL)
            logger.info("Toil process %s successfully terminated." % str(pid2kill))
        except OSError:
            logger.error("Toil process %s could not be terminated." % str(pid2kill))
            raise
Пример #3
0
def main():
    parser = parser_with_common_options(jobstore_option=True)
    parser.add_argument(
        "jobID",
        nargs=1,
        help=
        "The job store id of a job within the provided jobstore to run by itself."
    )
    parser.add_argument(
        "--printJobInfo",
        nargs=1,
        help=
        "Return information about this job to the user including preceding jobs, "
        "inputs, outputs, and runtime from the last known run.")

    options = parser.parse_args()
    set_logging_from_options(options)
    config = Config()
    config.setOptions(options)

    jobStore = Toil.resumeJobStore(config.jobStore)

    if options.printJobInfo:
        printContentsOfJobStore(jobStorePath=config.jobStore,
                                nameOfJob=options.printJobInfo)

    # TODO: Option to print list of successor jobs
    # TODO: Option to run job within python debugger, allowing step through of arguments
    # idea would be to have option to import pdb and set breakpoint at the start of the user's code

    jobID = options.jobID[0]
    logger.debug(f"Running the following job locally: {jobID}")
    workerScript(jobStore, config, jobID, jobID, redirectOutputToLogFile=False)
    logger.debug(f"Finished running: {jobID}")
Пример #4
0
def main():
    parser = getBasicOptionParser()
    parser.add_argument("jobStore", type=str,
                        help="The location of the job store to delete. " + jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    config = Config()
    config.setOptions(parseBasicOptions(parser))
    logger.info("Attempting to delete the job store")
    jobStore = Toil.getJobStore(config.jobStore)
    jobStore.destroy()
    logger.info("Successfully deleted the job store")
Пример #5
0
        def _createDummyConfig():
            """
            Returns a dummy config for the batch system tests.  We need a workflowID to be set up
            since we are running tests without setting up a jobstore.

            :rtype: toil.common.Config
            """
            config = Config()
            from uuid import uuid4
            config.workflowID = str(uuid4())
            return config
Пример #6
0
        def _createDummyConfig():
            """
            Returns a dummy config for the batch system tests.  We need a workflowID to be set up
            since we are running tests without setting up a jobstore.

            :rtype: toil.common.Config
            """
            config = Config()
            from uuid import uuid4
            config.workflowID = str(uuid4())
            return config
Пример #7
0
        def createConfig(cls):
            """
            Returns a dummy config for the batch system tests.  We need a workflowID to be set up
            since we are running tests without setting up a jobstore. This is the class version
            to be used when an instance is not available.

            :rtype: toil.common.Config
            """
            config = Config()
            from uuid import uuid4
            config.workflowID = str(uuid4())
            return config
Пример #8
0
        def createConfig(cls):
            """
            Returns a dummy config for the batch system tests.  We need a workflowID to be set up
            since we are running tests without setting up a jobstore. This is the class version
            to be used when an instance is not available.

            :rtype: toil.common.Config
            """
            config = Config()
            from uuid import uuid4
            config.workflowID = str(uuid4())
            return config
Пример #9
0
def main():
    """ Reports stats on the workflow, use with --stats option to toil.
    """
    parser = getBasicOptionParser()
    initializeOptions(parser)
    options = parseBasicOptions(parser)
    checkOptions(options, parser)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)
    stats = getStats(jobStore)
    collatedStatsTag = processData(jobStore.config, stats)
    reportData(collatedStatsTag, options)
Пример #10
0
def main():
    """ Reports stats on the workflow, use with --stats option to toil.
    """
    parser = getBasicOptionParser()
    initializeOptions(parser)
    options = parseBasicOptions(parser)
    checkOptions(options, parser)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)
    stats = getStats(jobStore)
    collatedStatsTag = processData(jobStore.config, stats)
    reportData(collatedStatsTag, options)
Пример #11
0
def main():
    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help="The location of the job store used by the workflow." +
        jobStoreLocatorHelp)
    parser.add_argument("--localFilePath",
                        nargs=1,
                        help="Location to which to copy job store files.")
    parser.add_argument("--fetch",
                        nargs="+",
                        help="List of job-store files to be copied locally."
                        "Use either explicit names (i.e. 'data.txt'), or "
                        "specify glob patterns (i.e. '*.txt')")
    parser.add_argument(
        "--listFilesInJobStore",
        help="Prints a list of the current files in the jobStore.")
    parser.add_argument(
        "--fetchEntireJobStore",
        help="Copy all job store files into a local directory.")
    parser.add_argument(
        "--useSymlinks",
        help="Creates symlink 'shortcuts' of files in the localFilePath"
        " instead of hardlinking or copying, where possible.  If this is"
        " not possible, it will copy the files (shutil.copyfile()).")
    parser.add_argument("--version", action='version', version=version)

    # Load the jobStore
    options = parseBasicOptions(parser)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)
    logger.debug("Connected to job store: %s", config.jobStore)

    if options.fetch:
        # Copy only the listed files locally
        logger.debug("Fetching local files: %s", options.fetch)
        fetchJobStoreFiles(jobStore=jobStore, options=options)

    elif options.fetchEntireJobStore:
        # Copy all jobStore files locally
        logger.debug("Fetching all local files.")
        options.fetch = "*"
        fetchJobStoreFiles(jobStore=jobStore, options=options)

    if options.listFilesInJobStore:
        # Log filenames and create a file containing these names in cwd
        printContentsOfJobStore(jobStorePath=options.jobStore)
Пример #12
0
 def testMultipleJobsPerWorkerStats(self):
     """
     Tests case where multiple jobs are run on 1 worker to insure that all jobs report back their data
     """
     options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
     options.clean = 'never'
     options.stats = True
     Job.Runner.startToil(RunTwoJobsPerWorker(), options)
     config = Config()
     config.setOptions(options)
     jobStore = Toil.resumeJobStore(config.jobStore)
     stats = getStats(jobStore)
     collatedStats = processData(jobStore.config, stats)
     self.assertTrue(len(collatedStats.job_types) == 2,
                     "Some jobs are not represented in the stats")
Пример #13
0
 def testMultipleJobsPerWorkerStats(self):
     """
     Tests case where multiple jobs are run on 1 worker to insure that all jobs report back their data
     """
     options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
     options.clean = 'never'
     options.stats = True
     Job.Runner.startToil(RunTwoJobsPerWorker(), options)
     config = Config()
     config.setOptions(options)
     jobStore = Toil.resumeJobStore(config.jobStore)
     stats = getStats(jobStore)
     collatedStats = processData(jobStore.config, stats)
     self.assertTrue(len(collatedStats.job_types) == 2,
                     "Some jobs are not represented in the stats")
Пример #14
0
def main():
    """
    This is a Toil pipeline to transfer TCGA data into an S3 Bucket

    Data is pulled down with Genetorrent and transferred to S3 via S3AM.
    """

    # Define Parser object and add to toil
    def existing_file(fname):
        """
        Argparse type for an existing file
        """
        if not os.path.isfile(fname):
            raise ValueError("Invalid file: " + str(fname))
        return fname

    parser = argparse.ArgumentParser(
        description=main.__doc__,
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument(
        '--sudo',
        dest='sudo',
        default=None,
        action='store_true',
        help=
        'Docker usually needs sudo to execute locally, but not when running Mesos or when '
        'the user is a member of a Docker group.')
    Job.Runner.addToilOptions(parser)
    parser.add_argument('datafiles',
                        nargs='+',
                        help='FASTA input',
                        type=existing_file)

    args = parser.parse_args()

    assert args.jobStore is not None
    config = Config()
    config.setOptions(args)

    # Store inputs from argparse
    inputs = {'sudo': args.sudo}
    datafiles = [os.path.abspath(d) for d in args.datafiles]
    # Start Pipeline
    options = Job.Runner.getDefaultOptions("./toilWorkflow")

    Job.Runner.startToil(Job.wrapJobFn(start_batch, datafiles, inputs),
                         options)
Пример #15
0
 def setUp(self):
     super(WorkerTests, self).setUp()
     path = self._getTestJobStorePath()
     self.jobStore = FileJobStore(path)
     self.config = Config()
     self.config.jobStore = 'file:%s' % path
     self.jobStore.initialize(self.config)
     self.jobNumber = 0
Пример #16
0
def main():
    parser = getBasicOptionParser()

    parser.add_argument("jobStore", type=str,
                        help="The location of the job store used by the workflow whose jobs should "
                             "be killed." + jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)

    logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore)
    ####This behaviour is now broken
    batchSystem = Toil.createBatchSystem(jobStore.config) #This should automatically kill the existing jobs.. so we're good.
    for jobID in batchSystem.getIssuedBatchJobIDs(): #Just in case we do it again.
        batchSystem.killBatchJobs(jobID)
    logger.info("All jobs SHOULD have been killed")
Пример #17
0
def main():
    parser = getBasicOptionParser()
    parser.add_argument("jobStore",
                        type=str,
                        help="The location of the job store to delete. " +
                        jobStoreLocatorHelp)
    parser.add_argument("--version", action='version', version=version)
    config = Config()
    config.setOptions(parseBasicOptions(parser))
    try:
        jobStore = Toil.getJobStore(config.jobStore)
        jobStore.resume()
        jobStore.destroy()
        logger.info("Successfully deleted the job store: %s" % config.jobStore)
    except NoSuchJobStoreException:
        logger.info("Failed to delete the job store: %s is non-existent" %
                    config.jobStore)
    except:
        logger.info("Failed to delete the job store: %s" % config.jobStore)
        raise
Пример #18
0
def main() -> None:
    """Reports stats on the workflow, use with --stats option to toil."""
    parser = parser_with_common_options()
    add_stats_options(parser)
    options = parser.parse_args()

    for c in options.categories.split(","):
        if c.strip() not in category_choices:
            raise ValueError(f'{c} not in {category_choices}!')
    options.categories = [
        x.strip().lower() for x in options.categories.split(",")
    ]

    set_logging_from_options(options)
    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)
    stats = getStats(jobStore)
    collatedStatsTag = processData(jobStore.config, stats)
    reportData(collatedStatsTag, options)
Пример #19
0
def main():
    parser = getBasicOptionParser()

    parser.add_argument(
        "jobStore",
        type=str,
        help="The location of the job store used by the workflow." +
        jobStoreLocatorHelp)
    parser.add_argument("jobID",
                        nargs=1,
                        help="The job store id of a job "
                        "within the provided jobstore to run by itself.")
    parser.add_argument(
        "--printJobInfo",
        nargs=1,
        help="Return information about this job to the user"
        " including preceding jobs, inputs, outputs, and runtime"
        " from the last known run.")
    parser.add_argument("--version", action='version', version=version)

    # Parse options
    options = parseBasicOptions(parser)
    config = Config()
    config.setOptions(options)

    # Load the job store
    jobStore = Toil.resumeJobStore(config.jobStore)

    if options.printJobInfo:
        printContentsOfJobStore(jobStorePath=options.jobStore,
                                nameOfJob=options.printJobInfo)

    # TODO: Option to print list of successor jobs
    # TODO: Option to run job within python debugger, allowing step through of arguments
    # idea would be to have option to import pdb and set breakpoint at the start of the user's code

    # Run the job locally
    jobID = options.jobID[0]
    logger.debug("Going to run the following job locally: %s", jobID)
    workerScript(jobStore, config, jobID, jobID, redirectOutputToLogFile=False)
    logger.debug("Ran the following job locally: %s", jobID)
Пример #20
0
    def setUp(self):
        super(ClusterScalerTest, self).setUp()
        self.config = Config()
        self.config.targetTime = 1800
        self.config.nodeTypes = ['r3.8xlarge', 'c4.8xlarge:0.6']
        # Set up a stub provisioner with some nodeTypes and nodeShapes.
        self.provisioner = object()
        self.provisioner.nodeTypes = ['r3.8xlarge', 'c4.8xlarge']
        self.provisioner.nodeShapes = [r3_8xlarge, c4_8xlarge_preemptable]
        self.provisioner.setStaticNodes = lambda _, __: None
        self.provisioner.retryPredicate = lambda _: False

        self.leader = MockBatchSystemAndProvisioner(self.config, 1)
Пример #21
0
    def setUp(self):
        super(ClusterScalerTest, self).setUp()
        self.config = Config()
        self.config.targetTime = 1800
        self.config.nodeTypes = [r3_8xlarge, c4_8xlarge_preemptable]

        # Set up the mock leader
        self.leader = MockBatchSystemAndProvisioner(self.config, 1)
        # It is also a full mock provisioner, so configure it to be that as well
        self.provisioner = self.leader
        # Pretend that Shapes are actually strings we can use for instance type names.
        self.provisioner.setAutoscaledNodeTypes([
            ({t}, None) for t in self.config.nodeTypes
        ])
Пример #22
0
def main() -> None:
    parser = parser_with_common_options()
    options = parser.parse_args()
    set_logging_from_options(options)
    config = Config()
    config.setOptions(options)

    job_store_type, _ = Toil.parseLocator(config.jobStore)

    if job_store_type != 'file':
        # Remote (aws/google) jobstore; use the old (broken?) method
        job_store = Toil.resumeJobStore(config.jobStore)
        logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore)
        # TODO: This behaviour is now broken: https://github.com/DataBiosphere/toil/commit/a3d65fc8925712221e4cda116d1825d4a1e963a1
        # There's no guarantee that the batch system in use can enumerate
        # running jobs belonging to the job store we've attached to. And
        # moreover we don't even bother trying to kill the leader at its
        # recorded PID, even if it is a local process.
        batch_system = Toil.createBatchSystem(job_store.config)  # Should automatically kill existing jobs, so we're good.
        for job_id in batch_system.getIssuedBatchJobIDs():  # Just in case we do it again.
            batch_system.killBatchJobs([job_id])
        logger.info("All jobs SHOULD have been killed")
    else:
        # otherwise, kill the pid recorded in the jobstore.
        # TODO: We assume thnis is a local PID.
        job_store = Toil.resumeJobStore(config.jobStore)
        assert isinstance(job_store, FileJobStore), "Need a FileJobStore which has a sharedFilesDir"
        pid_log = os.path.join(job_store.sharedFilesDir, 'pid.log')
        with open(pid_log) as f:
            pid_to_kill = f.read().strip()
        try:
            os.kill(int(pid_to_kill), signal.SIGTERM)
            logger.info("Toil process %s successfully terminated." % str(pid_to_kill))
        except OSError:
            logger.error("Toil process %s could not be terminated." % str(pid_to_kill))
            raise
Пример #23
0
 def test(self):
     # We'll use fractions to avoid rounding errors. Remember that not every fraction can be
     # represented as a floating point number.
     F = Fraction
     # This test isn't general enough to cover every possible value of minCores in
     # SingleMachineBatchSystem. Instead we hard-code a value and assert it.
     minCores = F(1, 10)
     self.assertEquals(float(minCores), SingleMachineBatchSystem.minCores)
     for maxCores in {F(minCores), minCores * 10, F(1), F(numCores, 2), F(numCores)}:
         for coresPerJob in {F(minCores), F(minCores * 10), F(1), F(maxCores, 2), F(maxCores)}:
             for load in (F(1, 10), F(1), F(10)):
                 jobs = int(maxCores / coresPerJob * load)
                 if jobs >= 1 and minCores <= coresPerJob < maxCores:
                     self.assertEquals(maxCores, float(maxCores))
                     bs = SingleMachineBatchSystem(config=Config(),
                                                   maxCores=float(maxCores),
                                                   # Ensure that memory or disk requirements
                                                   # don't get in the way.
                                                   maxMemory=jobs * 10,
                                                   maxDisk=jobs * 10)
                     try:
                         jobIds = set()
                         for i in range(0, int(jobs)):
                             jobIds.add(bs.issueBatchJob(command=self.scriptCommand(),
                                                         cores=float(coresPerJob),
                                                         memory=1,
                                                         disk=1,
                                                         preemptable=preemptable))
                         self.assertEquals(len(jobIds), jobs)
                         while jobIds:
                             job = bs.getUpdatedBatchJob(maxWait=10)
                             self.assertIsNotNone(job)
                             jobId, status, wallTime = job
                             self.assertEquals(status, 0)
                             # would raise KeyError on absence
                             jobIds.remove(jobId)
                     finally:
                         bs.shutdown()
                     concurrentTasks, maxConcurrentTasks = getCounters(self.counterPath)
                     self.assertEquals(concurrentTasks, 0)
                     log.info('maxCores: {maxCores}, '
                              'coresPerJob: {coresPerJob}, '
                              'load: {load}'.format(**locals()))
                     # This is the key assertion:
                     expectedMaxConcurrentTasks = min(maxCores / coresPerJob, jobs)
                     self.assertEquals(maxConcurrentTasks, expectedMaxConcurrentTasks)
                     resetCounters(self.counterPath)
Пример #24
0
 def _createDummyConfig():
     config = Config()
     """
     config = ElementTree.Element("config")
     config.attrib["log_level"] = 'DEBUG'
     config.attrib["job_store"] = '.'
     config.attrib["parasol_command"] = None
     config.attrib["try_count"] = str(2)
     config.attrib["max_job_duration"] = str(1)
     config.attrib["batch_system"] = None
     config.attrib["max_log_file_size"] = str(1)
     config.attrib["default_memory"] = str(1)
     config.attrib["default_cores"] = str(1)
     config.attrib["max_cores"] = str(1)
     config.attrib["max_memory"] = str(1)
     config.attrib["scale"] = str(1)
     """
     return config
Пример #25
0
    def setUp(self):
        super(ClusterScalerTest, self).setUp()
        self.config = Config()
        self.config.targetTime = 1800
        self.config.nodeTypes = ['r3.8xlarge', 'c4.8xlarge:0.6']
        # Set up a stub provisioner with some nodeTypes and nodeShapes.
        try:
            # In Python 3 we can use a SimpleNamespace as a mock provisioner
            self.provisioner = types.SimpleNamespace()
        except:
            # In Python 2 we can just tack fields onto an object
            self.provisioner = object()
        setattr(self.provisioner, 'nodeTypes', ['r3.8xlarge', 'c4.8xlarge'])
        setattr(self.provisioner, 'nodeShapes',
                [r3_8xlarge, c4_8xlarge_preemptable])
        setattr(self.provisioner, 'setStaticNodes', lambda _, __: None)
        setattr(self.provisioner, 'retryPredicate', lambda _: False)

        self.leader = MockBatchSystemAndProvisioner(self.config, 1)
Пример #26
0
    def setUp(self):
        super(ClusterScalerTest, self).setUp()
        self.config = Config()
        self.config.targetTime = 1800
        self.config.nodeTypes = ['r3.8xlarge', 'c4.8xlarge:0.6']
        # Set up a stub provisioner with some nodeTypes and nodeShapes.
        try:
            # In Python 3 we can use a SimpleNamespace as a mock provisioner
            self.provisioner = types.SimpleNamespace()
        except:
            # In Python 2 we should just be able to tack fields onto an object.
            # But this has been known to produce:
            # AttributeError: 'newobject' object has no attribute 'nodeTypes'
            # So we use an Argparse Namespace instead.
            import argparse
            self.provisioner = argparse.Namespace()
        setattr(self.provisioner, 'nodeTypes', ['r3.8xlarge', 'c4.8xlarge'])
        setattr(self.provisioner, 'nodeShapes',
                [r3_8xlarge, c4_8xlarge_preemptable])
        setattr(self.provisioner, 'setStaticNodes', lambda _, __: None)
        setattr(self.provisioner, 'retryPredicate', lambda _: False)

        self.leader = MockBatchSystemAndProvisioner(self.config, 1)
Пример #27
0
 def _createDummyConfig(self):
     return Config()
Пример #28
0
    def testClusterScalingMultipleNodeTypes(self):

        smallNode = Shape(20, 5, 10, 10, False)
        mediumNode = Shape(20, 10, 10, 10, False)
        largeNode = Shape(20, 20, 10, 10, False)

        numJobs = 100

        config = Config()

        # Make defaults dummy values
        config.defaultMemory = 1
        config.defaultCores = 1
        config.defaultDisk = 1

        # No preemptable nodes/jobs
        config.preemptableNodeTypes = []
        config.minPreemptableNodes = []
        config.maxPreemptableNodes = []  # No preemptable nodes

        # Make sure the node types don't have to be ordered
        config.nodeTypes = [largeNode, smallNode, mediumNode]
        config.minNodes = [0, 0, 0]
        config.maxNodes = [10, 10]  # test expansion of this list

        # Algorithm parameters
        config.targetTime = defaultTargetTime
        config.betaInertia = 0.1
        config.scaleInterval = 3

        mock = MockBatchSystemAndProvisioner(config, secondsPerJob=2.0)
        clusterScaler = ScalerThread(mock, mock, config)
        clusterScaler.start()
        mock.start()

        try:
            # Add small jobs
            list(
                map(lambda x: mock.addJob(jobShape=smallNode),
                    list(range(numJobs))))
            list(
                map(lambda x: mock.addJob(jobShape=mediumNode),
                    list(range(numJobs))))

            # Add medium completed jobs
            for i in range(1000):
                iJ = JobNode(jobStoreID=1,
                             requirements=dict(memory=random.choice(
                                 range(smallNode.memory, mediumNode.memory)),
                                               cores=mediumNode.cores,
                                               disk=largeNode.cores,
                                               preemptable=False),
                             command=None,
                             jobName='testClusterScaling',
                             unitName='')
                clusterScaler.addCompletedJob(iJ, random.choice(range(1, 10)))

            while mock.getNumberOfJobsIssued() > 0 or mock.getNumberOfNodes(
            ) > 0:
                logger.debug("%i nodes currently provisioned" %
                             mock.getNumberOfNodes())
                # Make sure there are no large nodes
                self.assertEqual(mock.getNumberOfNodes(nodeType=largeNode), 0)
                clusterScaler.check()
                time.sleep(0.5)
        finally:
            clusterScaler.shutdown()
            mock.shutDown()

        # Make sure jobs ran on both the small and medium node types
        self.assertTrue(mock.totalJobs > 0)
        self.assertTrue(mock.maxWorkers[smallNode] > 0)
        self.assertTrue(mock.maxWorkers[mediumNode] > 0)

        self.assertEqual(mock.maxWorkers[largeNode], 0)
Пример #29
0
def main():
    """Reports the state of a Toil workflow."""
    parser = getBasicOptionParser()

    parser.add_argument("jobStore", type=str,
                        help="The location of a job store that holds the information about the "
                             "workflow whose status is to be reported on." + jobStoreLocatorHelp)

    parser.add_argument("--failIfNotComplete", action="store_true",
                        help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
                        default=False)

    parser.add_argument("--noAggStats", dest="stats", action="store_false",
                        help="Do not print overall, aggregate status of workflow.",
                        default=True)

    parser.add_argument("--printDot", action="store_true",
                        help="Print dot formatted description of the graph. If using --jobs will "
                             "restrict to subgraph including only those jobs. default=%(default)s",
                        default=False)

    parser.add_argument("--jobs", nargs='+',
                        help="Restrict reporting to the following jobs (allows subsetting of the report).",
                        default=None)

    parser.add_argument("--printPerJobStats", action="store_true",
                        help="Print info about each job. default=%(default)s",
                        default=False)

    parser.add_argument("--printLogs", action="store_true",
                        help="Print the log files of jobs (if they exist). default=%(default)s",
                        default=False)

    parser.add_argument("--printChildren", action="store_true",
                        help="Print children of each job. default=%(default)s",
                        default=False)

    parser.add_argument("--version", action='version', version=version)

    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    config = Config()
    config.setOptions(options)
    jobStore = Toil.resumeJobStore(config.jobStore)

    ##########################################
    # Gather the jobs to report
    ##########################################

    # Gather all jobs in the workflow in jobsToReport
    if options.jobs == None:
        rootJob = fetchRootJob(jobStore)
        logger.info('Traversing the job graph gathering jobs. This may take a couple of minutes.')
        jobsToReport = traverseJobGraph(rootJob, jobStore)

    # Only gather jobs specified in options.jobs
    else:
        jobsToReport = fetchUserJobs(jobStore, jobs=options.jobs)

    ##########################################
    # Report on the jobs
    ##########################################

    jobStats = report_on_jobs(jobsToReport, jobStore, options)

    hasChildren = jobStats['hasChildren']
    readyToRun = jobStats['readyToRun']
    zombies = jobStats['zombies']
    hasServices = jobStats['hasServices']
    services = jobStats['services']
    hasLogFile = jobStats['hasLogFile']
    properties = jobStats['properties']
    childNumber = jobStats['childNumber']

    if options.printPerJobStats:
        printAggregateJobStats(jobsToReport, properties, childNumber)

    if options.printLogs:
        printJobLog(jobsToReport, jobStore)

    if options.printChildren:
        printJobChildren(jobsToReport)

    if options.printDot:
        print_dot_chart(jobsToReport, jobStore_name=config.jobStore)

    if options.stats:
        print('Of the %i jobs considered, '
           'there are %i jobs with children, '
           '%i jobs ready to run, '
           '%i zombie jobs, '
           '%i jobs with services, '
           '%i services, '
           'and %i jobs with log files currently in %s.' %
            (len(jobsToReport), len(hasChildren), len(readyToRun), len(zombies),
             len(hasServices), len(services), len(hasLogFile), config.jobStore))

    if len(jobsToReport) > 0 and options.failIfNotComplete:
        # Upon workflow completion, all jobs will have been removed from job store
        exit(1)
Пример #30
0
    def testClusterScalingWithPreemptableJobs(self):
        """
        Test scaling simultaneously for a batch of preemptable and non-preemptable jobs.
        """
        config = Config()

        jobShape = Shape(20, 10, 10, 10, False)
        preemptableJobShape = Shape(20, 10, 10, 10, True)

        # Make defaults dummy values
        config.defaultMemory = 1
        config.defaultCores = 1
        config.defaultDisk = 1

        # non-preemptable node parameters
        config.nodeTypes = [jobShape, preemptableJobShape]
        config.minNodes = [0, 0]
        config.maxNodes = [10, 10]

        # Algorithm parameters
        config.targetTime = defaultTargetTime
        config.betaInertia = 0.9
        config.scaleInterval = 3

        self._testClusterScaling(config,
                                 numJobs=100,
                                 numPreemptableJobs=100,
                                 jobShape=jobShape)
Пример #31
0
def main(args=None, stdout=sys.stdout):
    config = Config()
    config.cwl = True
    parser = argparse.ArgumentParser()
    addOptions(parser, config)
    parser.add_argument("cwltool", type=str)
    parser.add_argument("cwljob", nargs=argparse.REMAINDER)

    # Will override the "jobStore" positional argument, enables
    # user to select jobStore or get a default from logic one below.
    parser.add_argument("--jobStore", type=str)
    parser.add_argument("--not-strict", action="store_true")
    parser.add_argument("--no-container", action="store_true")
    parser.add_argument("--quiet", dest="logLevel", action="store_const", const="ERROR")
    parser.add_argument("--basedir", type=str)
    parser.add_argument("--outdir", type=str, default=os.getcwd())
    parser.add_argument("--version", action='version', version=baseVersion)
    parser.add_argument("--user-space-docker-cmd",
                        help="(Linux/OS X only) Specify a user space docker "
                        "command (like udocker or dx-docker) that will be "
                        "used to call 'pull' and 'run'")
    parser.add_argument("--preserve-environment", type=str, nargs='+',
                    help="Preserve specified environment variables when running CommandLineTools",
                    metavar=("VAR1 VAR2"),
                    default=("PATH",),
                    dest="preserve_environment")
    # help="Dependency resolver configuration file describing how to adapt 'SoftwareRequirement' packages to current system."
    parser.add_argument("--beta-dependency-resolvers-configuration", default=None)
    # help="Defaut root directory used by dependency resolvers configuration."
    parser.add_argument("--beta-dependencies-directory", default=None)
    # help="Use biocontainers for tools without an explicitly annotated Docker container."
    parser.add_argument("--beta-use-biocontainers", default=None, action="store_true")
    # help="Short cut to use Conda to resolve 'SoftwareRequirement' packages."
    parser.add_argument("--beta-conda-dependencies", default=None, action="store_true")
    parser.add_argument("--tmpdir-prefix", type=Text,
                        help="Path prefix for temporary directories",
                        default="tmp")
    parser.add_argument("--tmp-outdir-prefix", type=Text,
                        help="Path prefix for intermediate output directories",
                        default="tmp")

    # mkdtemp actually creates the directory, but
    # toil requires that the directory not exist,
    # so make it and delete it and allow
    # toil to create it again (!)
    workdir = tempfile.mkdtemp()
    os.rmdir(workdir)

    if args is None:
        args = sys.argv[1:]

    options = parser.parse_args([workdir] + args)

    use_container = not options.no_container

    if options.logLevel:
        cwllogger.setLevel(options.logLevel)

    outdir = os.path.abspath(options.outdir)
    fileindex = {}
    existing = {}
    make_tool_kwargs = {}
    conf_file = getattr(options, "beta_dependency_resolvers_configuration", None)  # Text
    use_conda_dependencies = getattr(options, "beta_conda_dependencies", None)  # Text
    job_script_provider = None
    if conf_file or use_conda_dependencies:
        dependencies_configuration = DependenciesConfiguration(options)  # type: DependenciesConfiguration
        job_script_provider = dependencies_configuration

    options.default_container = None
    make_tool_kwargs["find_default_container"] = functools.partial(find_default_container, options)

    with Toil(options) as toil:
        if options.restart:
            outobj = toil.restart()
        else:
            useStrict = not options.not_strict
            make_tool_kwargs["hints"] = [{
                "class": "ResourceRequirement",
                "coresMin": toil.config.defaultCores,
                "ramMin": toil.config.defaultMemory / (2**20),
                "outdirMin": toil.config.defaultDisk / (2**20),
                "tmpdirMin": 0
            }]
            try:
                t = cwltool.load_tool.load_tool(options.cwltool, toilMakeTool,
                                                kwargs=make_tool_kwargs,
                                                resolver=cwltool.resolver.tool_resolver,
                                                strict=useStrict)
                unsupportedRequirementsCheck(t.requirements)
            except cwltool.process.UnsupportedRequirement as e:
                logging.error(e)
                return 33

            if type(t) == int:
                return t

            options.workflow = options.cwltool
            options.job_order = options.cwljob
            options.tool_help = None
            options.debug = options.logLevel == "DEBUG"
            job, options.basedir, loader = cwltool.main.load_job_order(
                options, sys.stdin, None, [], options.job_order)
            job = cwltool.main.init_job_order(job, options, t, loader=loader)

            fillInDefaults(t.tool["inputs"], job)

            def pathToLoc(p):
                if "location" not in p and "path" in p:
                    p["location"] = p["path"]
                    del p["path"]

            def importFiles(tool):
                visit_class(tool, ("File", "Directory"), pathToLoc)
                normalizeFilesDirs(tool)
                adjustDirObjs(tool, functools.partial(get_listing,
                                                      cwltool.stdfsaccess.StdFsAccess(""),
                                                      recursive=True))
                adjustFileObjs(tool, functools.partial(uploadFile,
                                                       toil.importFile,
                                                       fileindex, existing, skip_broken=True))

            t.visit(importFiles)

            for inp in t.tool["inputs"]:
                def setSecondary(fileobj):
                    if isinstance(fileobj, dict) and fileobj.get("class") == "File":
                        if "secondaryFiles" not in fileobj:
                            fileobj["secondaryFiles"] = [{
                                "location": cwltool.builder.substitute(fileobj["location"], sf), "class": "File"}
                                                         for sf in inp["secondaryFiles"]]

                    if isinstance(fileobj, list):
                        for e in fileobj:
                            setSecondary(e)

                if shortname(inp["id"]) in job and inp.get("secondaryFiles"):
                    setSecondary(job[shortname(inp["id"])])

            importFiles(job)
            visitSteps(t, importFiles)

            try:
                make_opts = copy.deepcopy(vars(options))
                make_opts.update({'tool': t, 'jobobj': {},
                    'use_container': use_container,
                    'tmpdir': os.path.realpath(outdir),
                    'job_script_provider': job_script_provider})

                (wf1, wf2) = makeJob(**make_opts)
            except cwltool.process.UnsupportedRequirement as e:
                logging.error(e)
                return 33

            wf1.cwljob = job
            outobj = toil.start(wf1)

        outobj = resolve_indirect(outobj)

        toilStageFiles(toil, outobj, outdir, fileindex, existing, True)

        visit_class(outobj, ("File",), functools.partial(compute_checksums, cwltool.stdfsaccess.StdFsAccess("")))

        stdout.write(json.dumps(outobj, indent=4))

    return 0
Пример #32
0
def main() -> None:
    """Reports the state of a Toil workflow."""
    parser = parser_with_common_options()
    parser.add_argument(
        "--failIfNotComplete",
        action="store_true",
        help=
        "Return exit value of 1 if toil jobs not all completed. default=%(default)s",
        default=False)

    parser.add_argument(
        "--noAggStats",
        dest="stats",
        action="store_false",
        help="Do not print overall, aggregate status of workflow.",
        default=True)

    parser.add_argument(
        "--printDot",
        action="store_true",
        help=
        "Print dot formatted description of the graph. If using --jobs will "
        "restrict to subgraph including only those jobs. default=%(default)s",
        default=False)

    parser.add_argument(
        "--jobs",
        nargs='+',
        help=
        "Restrict reporting to the following jobs (allows subsetting of the report).",
        default=None)

    parser.add_argument("--printPerJobStats",
                        action="store_true",
                        help="Print info about each job. default=%(default)s",
                        default=False)

    parser.add_argument(
        "--printLogs",
        action="store_true",
        help="Print the log files of jobs (if they exist). default=%(default)s",
        default=False)

    parser.add_argument("--printChildren",
                        action="store_true",
                        help="Print children of each job. default=%(default)s",
                        default=False)

    options = parser.parse_args()
    set_logging_from_options(options)

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)

    config = Config()
    config.setOptions(options)

    try:
        status = ToilStatus(config.jobStore, options.jobs)
    except NoSuchJobStoreException:
        print('No job store found.')
        return
    except JobException:  # Workflow likely complete, user informed in ToilStatus()
        return

    jobStats = status.report_on_jobs()

    # Info to be reported.
    hasChildren = jobStats['hasChildren']
    readyToRun = jobStats['readyToRun']
    zombies = jobStats['zombies']
    hasServices = jobStats['hasServices']
    services = jobStats['services']
    hasLogFile = jobStats['hasLogFile']
    properties = jobStats['properties']
    childNumber = jobStats['childNumber']

    if options.printPerJobStats:
        status.printAggregateJobStats(properties, childNumber)
    if options.printLogs:
        status.printJobLog()
    if options.printChildren:
        status.printJobChildren()
    if options.printDot:
        status.print_dot_chart()
    if options.stats:
        print('Of the %i jobs considered, '
              'there are %i jobs with children, '
              '%i jobs ready to run, '
              '%i zombie jobs, '
              '%i jobs with services, '
              '%i services, '
              'and %i jobs with log files currently in %s.' %
              (len(status.jobsToReport), len(hasChildren), len(readyToRun),
               len(zombies), len(hasServices), len(services), len(hasLogFile),
               status.jobStore))

    if len(status.jobsToReport) > 0 and options.failIfNotComplete:
        # Upon workflow completion, all jobs will have been removed from job store
        exit(1)
Пример #33
0
def main(args=None, stdout=sys.stdout):
    """Main method for toil-cwl-runner."""
    cwllogger.removeHandler(defaultStreamHandler)
    config = Config()
    config.cwl = True
    parser = argparse.ArgumentParser()
    addOptions(parser, config)
    parser.add_argument("cwltool", type=str)
    parser.add_argument("cwljob", nargs=argparse.REMAINDER)

    # Will override the "jobStore" positional argument, enables
    # user to select jobStore or get a default from logic one below.
    parser.add_argument("--jobStore", type=str)
    parser.add_argument("--not-strict", action="store_true")
    parser.add_argument("--quiet", dest="logLevel", action="store_const",
                        const="ERROR")
    parser.add_argument("--basedir", type=str)
    parser.add_argument("--outdir", type=str, default=os.getcwd())
    parser.add_argument("--version", action='version', version=baseVersion)
    dockergroup = parser.add_mutually_exclusive_group()
    dockergroup.add_argument(
        "--user-space-docker-cmd",
        help="(Linux/OS X only) Specify a user space docker command (like "
        "udocker or dx-docker) that will be used to call 'pull' and 'run'")
    dockergroup.add_argument(
        "--singularity", action="store_true", default=False,
        help="[experimental] Use Singularity runtime for running containers. "
        "Requires Singularity v2.3.2+ and Linux with kernel version v3.18+ or "
        "with overlayfs support backported.")
    dockergroup.add_argument(
        "--no-container", action="store_true", help="Do not execute jobs in a "
        "Docker container, even when `DockerRequirement` "
        "is specified under `hints`.")
    parser.add_argument(
        "--preserve-environment", type=str, nargs='+',
        help="Preserve specified environment variables when running"
        " CommandLineTools", metavar=("VAR1 VAR2"), default=("PATH",),
        dest="preserve_environment")
    parser.add_argument(
        "--destBucket", type=str,
        help="Specify a cloud bucket endpoint for output files.")
    parser.add_argument(
        "--beta-dependency-resolvers-configuration", default=None)
    parser.add_argument("--beta-dependencies-directory", default=None)
    parser.add_argument(
        "--beta-use-biocontainers", default=None, action="store_true")
    parser.add_argument(
        "--beta-conda-dependencies", default=None, action="store_true")
    parser.add_argument("--tmpdir-prefix", type=Text,
                        help="Path prefix for temporary directories",
                        default="tmp")
    parser.add_argument("--tmp-outdir-prefix", type=Text,
                        help="Path prefix for intermediate output directories",
                        default="tmp")
    parser.add_argument(
        "--force-docker-pull", action="store_true", default=False,
        dest="force_docker_pull",
        help="Pull latest docker image even if it is locally present")
    parser.add_argument(
        "--no-match-user", action="store_true", default=False,
        help="Disable passing the current uid to `docker run --user`")

    # mkdtemp actually creates the directory, but
    # toil requires that the directory not exist,
    # so make it and delete it and allow
    # toil to create it again (!)
    workdir = tempfile.mkdtemp()
    os.rmdir(workdir)

    if args is None:
        args = sys.argv[1:]

    # we use workdir as jobStore:
    options = parser.parse_args([workdir] + args)

    # if tmpdir_prefix is not the default value, set workDir too
    if options.tmpdir_prefix != 'tmp':
        options.workDir = options.tmpdir_prefix

    if options.provisioner and not options.jobStore:
        raise NoSuchJobStoreException(
            'Please specify a jobstore with the --jobStore option when specifying a provisioner.')

    use_container = not options.no_container

    if options.logLevel:
        cwllogger.setLevel(options.logLevel)

    outdir = os.path.abspath(options.outdir)
    tmp_outdir_prefix = os.path.abspath(options.tmp_outdir_prefix)
    tmpdir_prefix = os.path.abspath(options.tmpdir_prefix)

    fileindex = {}
    existing = {}
    conf_file = getattr(options,
                        "beta_dependency_resolvers_configuration", None)
    use_conda_dependencies = getattr(options, "beta_conda_dependencies", None)
    job_script_provider = None
    if conf_file or use_conda_dependencies:
        dependencies_configuration = DependenciesConfiguration(options)
        job_script_provider = dependencies_configuration

    options.default_container = None
    runtime_context = cwltool.context.RuntimeContext(vars(options))
    runtime_context.find_default_container = functools.partial(
        find_default_container, options)
    runtime_context.workdir = workdir
    runtime_context.move_outputs = "leave"
    runtime_context.rm_tmpdir = False
    loading_context = cwltool.context.LoadingContext(vars(options))

    with Toil(options) as toil:
        if options.restart:
            outobj = toil.restart()
        else:
            loading_context.hints = [{
                "class": "ResourceRequirement",
                "coresMin": toil.config.defaultCores,
                "ramMin": toil.config.defaultMemory / (2**20),
                "outdirMin": toil.config.defaultDisk / (2**20),
                "tmpdirMin": 0
            }]
            loading_context.construct_tool_object = toil_make_tool
            loading_context.resolver = cwltool.resolver.tool_resolver
            loading_context.strict = not options.not_strict
            options.workflow = options.cwltool
            options.job_order = options.cwljob
            uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri(
                options.cwltool, loading_context.resolver,
                loading_context.fetcher_constructor)
            options.tool_help = None
            options.debug = options.logLevel == "DEBUG"
            job_order_object, options.basedir, jobloader = \
                cwltool.main.load_job_order(
                    options, sys.stdin, loading_context.fetcher_constructor,
                    loading_context.overrides_list, tool_file_uri)
            document_loader, workflowobj, uri = \
                cwltool.load_tool.fetch_document(
                    uri, loading_context.resolver,
                    loading_context.fetcher_constructor)
            document_loader, avsc_names, processobj, metadata, uri = \
                cwltool.load_tool.validate_document(
                    document_loader, workflowobj, uri,
                    loading_context.enable_dev, loading_context.strict, False,
                    loading_context.fetcher_constructor, False,
                    loading_context.overrides_list,
                    do_validate=loading_context.do_validate)
            loading_context.overrides_list.extend(
                metadata.get("cwltool:overrides", []))
            try:
                tool = cwltool.load_tool.make_tool(
                    document_loader, avsc_names, metadata, uri,
                    loading_context)
            except cwltool.process.UnsupportedRequirement as err:
                logging.error(err)
                return 33
            runtime_context.secret_store = SecretStore()
            initialized_job_order = cwltool.main.init_job_order(
                job_order_object, options, tool, jobloader, sys.stdout,
                secret_store=runtime_context.secret_store)
            fs_access = cwltool.stdfsaccess.StdFsAccess(options.basedir)
            fill_in_defaults(
                tool.tool["inputs"], initialized_job_order, fs_access)

            def path_to_loc(obj):
                if "location" not in obj and "path" in obj:
                    obj["location"] = obj["path"]
                    del obj["path"]

            def import_files(tool):
                visit_class(tool, ("File", "Directory"), path_to_loc)
                visit_class(tool, ("File", ), functools.partial(
                    add_sizes, fs_access))
                normalizeFilesDirs(tool)
                adjustDirObjs(tool, functools.partial(
                    get_listing, fs_access, recursive=True))
                adjustFileObjs(tool, functools.partial(
                    uploadFile, toil.importFile, fileindex, existing,
                    skip_broken=True))

            tool.visit(import_files)

            for inp in tool.tool["inputs"]:
                def set_secondary(fileobj):
                    if isinstance(fileobj, Mapping) \
                            and fileobj.get("class") == "File":
                        if "secondaryFiles" not in fileobj:
                            fileobj["secondaryFiles"] = [
                                {"location": cwltool.builder.substitute(
                                    fileobj["location"], sf), "class": "File"}
                                for sf in inp["secondaryFiles"]]

                    if isinstance(fileobj, MutableSequence):
                        for entry in fileobj:
                            set_secondary(entry)

                if shortname(inp["id"]) in initialized_job_order \
                        and inp.get("secondaryFiles"):
                    set_secondary(initialized_job_order[shortname(inp["id"])])

            import_files(initialized_job_order)
            visitSteps(tool, import_files)

            try:
                runtime_context.use_container = use_container
                runtime_context.tmpdir = os.path.realpath(tmpdir_prefix)
                runtime_context.tmp_outdir_prefix = os.path.realpath(
                    tmp_outdir_prefix)
                runtime_context.job_script_provider = job_script_provider
                runtime_context.force_docker_pull = options.force_docker_pull
                runtime_context.no_match_user = options.no_match_user
                (wf1, _) = makeJob(tool, {}, None, runtime_context)
            except cwltool.process.UnsupportedRequirement as err:
                logging.error(err)
                return 33

            wf1.cwljob = initialized_job_order
            if wf1 is CWLJob:  # Clean up temporary directories only created with CWLJobs.
                wf1.addFollowOnFn(cleanTempDirs, wf1)
            outobj = toil.start(wf1)

        outobj = resolve_indirect(outobj)

        # Stage files. Specify destination bucket if specified in CLI
        # options. If destination bucket not passed in,
        # options.destBucket's value will be None.
        toilStageFiles(
            toil,
            outobj,
            outdir,
            fileindex,
            existing,
            export=True,
            destBucket=options.destBucket)

        if not options.destBucket:
            visit_class(outobj, ("File",), functools.partial(
                compute_checksums, cwltool.stdfsaccess.StdFsAccess("")))

        visit_class(outobj, ("File", ), MutationManager().unset_generation)
        stdout.write(json.dumps(outobj, indent=4))

    return 0
Пример #34
0
def main():
    """Reports the state of the toil.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser()
    
    parser.add_argument("jobStore", type=str,
                        help="The location of a job store that holds the information about the "
                             "workflow whose status is to be reported on." + jobStoreLocatorHelp)
    
    parser.add_argument("--verbose", dest="verbose", action="store_true",
                      help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%(default)s",
                      default=False)
    
    parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true",
                      help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
                      default=False)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for Toil")
    assert options.jobStore is not None
    config = Config()
    config.setOptions(options)
    ##########################################
    #Survey the status of the job and report.
    ##########################################  
    
    jobStore = Toil.resumeJobStore(config.jobStore)
    try:
        rootJob = jobStore.loadRootJob()
    except JobException:
        print('The root job of the job store is absent, the workflow completed successfully.',
              file=sys.stderr)
        sys.exit(0)

    def traverseGraph(jobGraph):
        foundJobStoreIDs = set()
        totalJobs = []
        def inner(jobGraph):
            if jobGraph.jobStoreID in foundJobStoreIDs:
                return
            foundJobStoreIDs.add(jobGraph.jobStoreID)
            totalJobs.append(jobGraph)
            # Traverse jobs in stack
            for jobs in jobGraph.stack:
                for successorJobStoreID in [x.jobStoreID for x in jobs]:
                    if (successorJobStoreID not in foundJobStoreIDs and jobStore.exists(successorJobStoreID)):
                        inner(jobStore.load(successorJobStoreID))

            # Traverse service jobs
            for jobs in jobGraph.services:
                for serviceJobStoreID in [x.jobStoreID for x in jobs]:
                    if jobStore.exists(serviceJobStoreID):
                        assert serviceJobStoreID not in foundJobStoreIDs
                        foundJobStoreIDs.add(serviceJobStoreID)
                        totalJobs.append(jobStore.load(serviceJobStoreID))
        inner(jobGraph)
        return totalJobs

    logger.info('Traversing the job graph. This may take a couple minutes.')
    totalJobs = traverseGraph(rootJob)

    failedJobs = []
    hasChildren = []
    hasServices = []
    services = []
    currentlyRunnning = []

    for job in totalJobs:
        if job.logJobStoreFileID is not None:
            failedJobs.append(job)
        if job.stack:
            hasChildren.append(job)
        elif job.remainingRetryCount != 0 and job.logJobStoreFileID != 0 and job.command:
            # The job has no children, hasn't failed, and has a command to run. This indicates that the job is
            # likely currently running, or at least could be run.
            currentlyRunnning.append(job)
        if job.services:
            hasServices.append(job)
        if job.startJobStoreID or job.terminateJobStoreID or job.errorJobStoreID:
            # these attributes are only set in service jobs
            services.append(job)

    logger.info('There are %i unfinished jobs, %i parent jobs with children, %i jobs with services, %i services, '
                'and %i totally failed jobs currently in %s.' %
                (len(totalJobs), len(hasChildren), len(hasServices), len(services), len(failedJobs), config.jobStore))

    if currentlyRunnning:
        logger.info('These %i jobs are currently active: %s',
                    len(currentlyRunnning), ' \n'.join(map(str, currentlyRunnning)))

    if options.verbose: #Verbose currently means outputting the files that have failed.
        if failedJobs:
            msg = "Outputting logs for the %i failed jobs" % (len(failedJobs))
            msg += ": %s" % ", ".join((str(failedJob) for failedJob in failedJobs))
            for jobNode in failedJobs:
                job = jobStore.load(jobNode.jobStoreID)
                msg += "\n=========> Failed job %s \n" % jobNode
                with job.getLogFileHandle(jobStore) as fH:
                    msg += fH.read()
                msg += "<=========\n"
            print(msg)
        else:
            print('There are no failed jobs to report.', file=sys.stderr)

    if totalJobs and options.failIfNotComplete:
        exit(1) # when the workflow is complete, all jobs will have been removed from job store
Пример #35
0
    def testClusterScalingWithPreemptableJobs(self):
        """
        Test scaling simultaneously for a batch of preemptable and non-preemptable jobs.
        """
        config = Config()

        # Make defaults dummy values
        config.defaultMemory = 1
        config.defaultCores = 1
        config.defaultDisk = 1

        # Preemptable node parameters
        config.nodeType = Shape(20, 10, 10, 10)
        config.minNodes = 0
        config.maxNodes = 10

        # Preemptable node parameters
        config.preemptableNodeType = Shape(20, 10, 10, 10)
        config.minPreemptableNodes = 0
        config.maxPreemptableNodes = 10

        # Algorithm parameters
        config.alphaPacking = 0.8
        config.betaInertia = 1.2
        config.scaleInterval = 3

        self._testClusterScaling(config, numJobs=100, numPreemptableJobs=100)
Пример #36
0
    def testClusterScaling(self):
        """
        Test scaling for a batch of non-preemptable jobs and no preemptable jobs (makes debugging
        easier).
        """
        config = Config()

        # Make defaults dummy values
        config.defaultMemory = 1
        config.defaultCores = 1
        config.defaultDisk = 1

        # No preemptable nodes/jobs
        config.maxPreemptableNodes = 0  # No preemptable nodes

        # Non-preemptable parameters
        config.nodeType = Shape(20, 10, 10, 10)
        config.minNodes = 0
        config.maxNodes = 10

        # Algorithm parameters
        config.alphaPacking = 0.8
        config.betaInertia = 1.2
        config.scaleInterval = 3

        self._testClusterScaling(config, numJobs=100, numPreemptableJobs=0)
Пример #37
0
def main():
    """Reports the state of the toil.
    """
    
    ##########################################
    #Construct the arguments.
    ##########################################  
    
    parser = getBasicOptionParser()
    
    parser.add_argument("jobStore", type=str,
                        help="The location of a job store that holds the information about the "
                             "workflow whose status is to be reported on." + jobStoreLocatorHelp)
    
    parser.add_argument("--verbose", dest="verbose", action="store_true",
                      help="Print loads of information, particularly all the log files of \
                      jobs that failed. default=%(default)s",
                      default=False)
    
    parser.add_argument("--failIfNotComplete", dest="failIfNotComplete", action="store_true",
                      help="Return exit value of 1 if toil jobs not all completed. default=%(default)s",
                      default=False)
    parser.add_argument("--version", action='version', version=version)
    options = parseBasicOptions(parser)
    logger.info("Parsed arguments")
    
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(0)
    
    ##########################################
    #Do some checks.
    ##########################################
    
    logger.info("Checking if we have files for Toil")
    assert options.jobStore is not None
    config = Config()
    config.setOptions(options)
    ##########################################
    #Survey the status of the job and report.
    ##########################################  
    
    jobStore = Toil.resumeJobStore(config.jobStore)
    try:
        rootJob = jobStore.loadRootJob()
    except JobException:
        print('The root job of the job store is absent, the workflow completed successfully.',
              file=sys.stderr)
        sys.exit(0)
    
    toilState = ToilState(jobStore, rootJob )

    # The first element of the toilState.updatedJobs tuple is the jobGraph we want to inspect
    totalJobs = set(toilState.successorCounts.keys()) | \
                {jobTuple[0] for jobTuple in toilState.updatedJobs}

    failedJobs = [ job for job in totalJobs if job.remainingRetryCount == 0 ]

    print('There are %i active jobs, %i parent jobs with children, and %i totally failed jobs '
          'currently in %s.' % (len(toilState.updatedJobs), len(toilState.successorCounts),
                                len(failedJobs), config.jobStore), file=sys.stderr)
    
    if options.verbose: #Verbose currently means outputting the files that have failed.
        for job in failedJobs:
            if job.logJobStoreFileID is not None:
                with job.getLogFileHandle(jobStore) as logFileHandle:
                    logStream(logFileHandle, job.jobStoreID, logger.warn)
            else:
                print('Log file for job %s is absent.' % job.jobStoreID, file=sys.stderr)
        if len(failedJobs) == 0:
            print('There are no failed jobs to report.', file=sys.stderr)
    
    if (len(toilState.updatedJobs) + len(toilState.successorCounts)) != 0 and \
        options.failIfNotComplete:
        sys.exit(1)
Пример #38
0
 def _createConfig(self):
     return Config()
Пример #39
0
    def testClusterScaling(self):
        """
        Test scaling for a batch of non-preemptable jobs and no preemptable jobs (makes debugging
        easier).
        """
        config = Config()

        # Make defaults dummy values
        config.defaultMemory = 1
        config.defaultCores = 1
        config.defaultDisk = 1

        # No preemptable nodes/jobs
        config.maxPreemptableNodes = []  # No preemptable nodes

        # Non-preemptable parameters
        config.nodeTypes = [Shape(20, 10, 10, 10, False)]
        config.minNodes = [0]
        config.maxNodes = [10]

        # Algorithm parameters
        config.targetTime = defaultTargetTime
        config.betaInertia = 0.1
        config.scaleInterval = 3

        self._testClusterScaling(config,
                                 numJobs=100,
                                 numPreemptableJobs=0,
                                 jobShape=config.nodeTypes[0])