def testConcurrencyWithDisk(self): """ Tests that the batch system is allocating disk resources properly """ tempDir = self._createTempDir('testFiles') options = Job.Runner.getDefaultOptions(self._getTestJobStorePath()) options.workDir = tempDir from toil import physicalDisk availableDisk = physicalDisk(options.workDir) logger.info('Testing disk concurrency limits with %s disk space', availableDisk) # More disk might become available by the time Toil starts, so we limit it here options.maxDisk = availableDisk options.batchSystem = self.batchSystemName counterPath = os.path.join(tempDir, 'counter') resetCounters(counterPath) value, maxValue = getCounters(counterPath) assert (value, maxValue) == (0, 0) half_disk = availableDisk // 2 more_than_half_disk = half_disk + 500 logger.info('Dividing into parts of %s and %s', half_disk, more_than_half_disk) root = Job() # Physically, we're asking for 50% of disk and 50% of disk + 500bytes in the two jobs. The # batchsystem should not allow the 2 child jobs to run concurrently. root.addChild( Job.wrapFn(measureConcurrency, counterPath, self.sleepTime, cores=1, memory='1M', disk=half_disk)) root.addChild( Job.wrapFn(measureConcurrency, counterPath, self.sleepTime, cores=1, memory='1M', disk=more_than_half_disk)) Job.Runner.startToil(root, options) _, maxValue = getCounters(counterPath) logger.info('After run: %s disk space', physicalDisk(options.workDir)) self.assertEqual(maxValue, 1)
def testConcurrencyWithDisk(self): """ Tests that the batch system is allocating disk resources properly """ tempDir = self._createTempDir('testFiles') options = Job.Runner.getDefaultOptions(self._getTestJobStorePath()) options.workDir = tempDir from toil import physicalDisk availableDisk = physicalDisk('', toilWorkflowDir=options.workDir) options.batchSystem = self.batchSystemName counterPath = os.path.join(tempDir, 'counter') resetCounters(counterPath) value, maxValue = getCounters(counterPath) assert (value, maxValue) == (0, 0) root = Job() # Physically, we're asking for 50% of disk and 50% of disk + 500bytes in the two jobs. The # batchsystem should not allow the 2 child jobs to run concurrently. root.addChild(Job.wrapFn(measureConcurrency, counterPath, self.sleepTime, cores=1, memory='1M', disk=old_div(availableDisk,2))) root.addChild(Job.wrapFn(measureConcurrency, counterPath, self.sleepTime, cores=1, memory='1M', disk=(old_div(availableDisk, 2)) + 500)) Job.Runner.startToil(root, options) _, maxValue = getCounters(counterPath) self.assertEqual(maxValue, 1)
def testConcurrencyWithDisk(self): """ Tests that the batch system is allocating disk resources properly """ tempDir = self._createTempDir('testFiles') options = Job.Runner.getDefaultOptions(self._getTestJobStorePath()) options.workDir = tempDir from toil import physicalDisk availableDisk = physicalDisk('', toilWorkflowDir=options.workDir) options.batchSystem = self.batchSystemName counterPath = os.path.join(tempDir, 'counter') resetCounters(counterPath) value, maxValue = getCounters(counterPath) assert (value, maxValue) == (0, 0) root = Job() # Physically, we're asking for 50% of disk and 50% of disk + 500bytes in the two jobs. The # batchsystem should not allow the 2 child jobs to run concurrently. root.addChild(Job.wrapFn(measureConcurrency, counterPath, self.sleepTime, cores=1, memory='1M', disk=availableDisk/2)) root.addChild(Job.wrapFn(measureConcurrency, counterPath, self.sleepTime, cores=1, memory='1M', disk=(availableDisk / 2) + 500)) Job.Runner.startToil(root, options) _, maxValue = getCounters(counterPath) self.assertEqual(maxValue, 1)
def __init__(self, config, maxCores, maxMemory, maxDisk): if maxCores > self.numCores: log.warn('Limiting maxCores to CPU count of system (%i).', self.numCores) maxCores = self.numCores if maxMemory > self.physicalMemory: log.warn('Limiting maxMemory to physically available memory (%i).', self.physicalMemory) maxMemory = self.physicalMemory self.physicalDisk = toil.physicalDisk(config) if maxDisk > self.physicalDisk: log.warn('Limiting maxDisk to physically available disk (%i).', self.physicalDisk) maxDisk = self.physicalDisk super(SingleMachineBatchSystem, self).__init__(config, maxCores, maxMemory, maxDisk) assert self.maxCores >= self.minCores assert self.maxMemory >= 1 # The scale allows the user to apply a factor to each task's cores requirement, thereby # squeezing more tasks onto each core (scale < 1) or stretching tasks over more cores # (scale > 1). self.scale = config.scale # Number of worker threads that will be started self.numWorkers = int(old_div(self.maxCores, self.minCores)) # A counter to generate job IDs and a lock to guard it self.jobIndex = 0 self.jobIndexLock = Lock() # A dictionary mapping IDs of submitted jobs to the command line self.jobs = {} """ :type: dict[str,toil.job.JobNode] """ # A queue of jobs waiting to be executed. Consumed by the workers. self.inputQueue = Queue() # A queue of finished jobs. Produced by the workers. self.outputQueue = Queue() # A dictionary mapping IDs of currently running jobs to their Info objects self.runningJobs = {} """ :type: dict[str,Info] """ # The list of worker threads self.workerThreads = [] """ :type list[Thread] """ # Variables involved with non-blocking resource acquisition self.acquisitionTimeout = 5 self.acquisitionRetryDelay = 10 self.aquisitionCondition = Condition() # A pool representing available CPU in units of minCores self.coreFractions = ResourcePool(self.numWorkers, 'cores', self.acquisitionTimeout) # A lock to work around the lack of thread-safety in Python's subprocess module self.popenLock = Lock() # A pool representing available memory in bytes self.memory = ResourcePool(self.maxMemory, 'memory', self.acquisitionTimeout) # A pool representing the available space in bytes self.disk = ResourcePool(self.maxDisk, 'disk', self.acquisitionTimeout) log.debug('Setting up the thread pool with %i workers, ' 'given a minimum CPU fraction of %f ' 'and a maximum CPU value of %i.', self.numWorkers, self.minCores, maxCores) for i in range(self.numWorkers): worker = Thread(target=self.worker, args=(self.inputQueue,)) self.workerThreads.append(worker) worker.start()
def __init__(self, config, maxCores, maxMemory, maxDisk): self.config = config # Limit to the smaller of the user-imposed limit and what we actually # have on this machine for each resource. # # If we don't have up to the limit of the resource (and the resource # isn't the inlimited sentinel), warn. if maxCores > self.numCores: if maxCores != sys.maxsize: # We have an actually specified limit and not the default log.warning( 'Not enough cores! User limited to %i but we only have %i.', maxCores, self.numCores) maxCores = self.numCores if maxMemory > self.physicalMemory: if maxMemory != sys.maxsize: # We have an actually specified limit and not the default log.warning( 'Not enough memory! User limited to %i bytes but we only have %i bytes.', maxMemory, self.physicalMemory) maxMemory = self.physicalMemory workdir = Toil.getLocalWorkflowDir( config.workflowID, config.workDir ) # config.workDir may be None; this sets a real directory self.physicalDisk = toil.physicalDisk(workdir) if maxDisk > self.physicalDisk: if maxDisk != sys.maxsize: # We have an actually specified limit and not the default log.warning( 'Not enough disk space! User limited to %i bytes but we only have %i bytes.', maxDisk, self.physicalDisk) maxDisk = self.physicalDisk super(SingleMachineBatchSystem, self).__init__(config, maxCores, maxMemory, maxDisk) assert self.maxCores >= self.minCores assert self.maxMemory >= 1 # The scale allows the user to apply a factor to each task's cores requirement, thereby # squeezing more tasks onto each core (scale < 1) or stretching tasks over more cores # (scale > 1). self.scale = config.scale if config.badWorker > 0 and config.debugWorker: # We can't throw SIGUSR1 at the worker because it is also going to # be the leader and/or test harness. raise RuntimeError( "Cannot use badWorker and debugWorker together; " "worker would have to kill the leader") self.debugWorker = config.debugWorker # A counter to generate job IDs and a lock to guard it self.jobIndex = 0 self.jobIndexLock = Lock() # A dictionary mapping IDs of submitted jobs to the command line self.jobs: Dict[str, toil.job.JobDescription] = {} # A queue of jobs waiting to be executed. Consumed by the daddy thread. self.inputQueue = Queue() # A queue of finished jobs. Produced by the daddy thread. self.outputQueue = Queue() # A dictionary mapping IDs of currently running jobs to their Info objects self.runningJobs: Dict[str, Info] = {} # These next two are only used outside debug-worker mode # A dict mapping PIDs to Popen objects for running jobs. # Jobs that don't fork are executed one at a time in the main thread. self.children: Dict[int, subprocess.Popen] = {} # A dict mapping child PIDs to the Job IDs they are supposed to be running. self.childToJob: Dict[int, str] = {} # A pool representing available CPU in units of minCores self.coreFractions = ResourcePool(int(self.maxCores / self.minCores), 'cores') # A pool representing available memory in bytes self.memory = ResourcePool(self.maxMemory, 'memory') # A pool representing the available space in bytes self.disk = ResourcePool(self.maxDisk, 'disk') # If we can't schedule something, we fill this in with a reason why self.schedulingStatusMessage = None # We use this event to signal shutdown self.shuttingDown = Event() # A thread in charge of managing all our child processes. # Also takes care of resource accounting. self.daddyThread = None # If it breaks it will fill this in self.daddyException: Optional[Exception] = None if self.debugWorker: log.debug('Started batch system %s in worker debug mode.', id(self)) else: self.daddyThread = Thread(target=self.daddy, daemon=True) self.daddyThread.start() log.debug('Started batch system %s in normal mode.', id(self))
def __init__(self, config, maxCores, maxMemory, maxDisk): if maxCores > self.numCores: log.warn('Limiting maxCores to CPU count of system (%i).', self.numCores) maxCores = self.numCores if maxMemory > self.physicalMemory: log.warn('Limiting maxMemory to physically available memory (%i).', self.physicalMemory) maxMemory = self.physicalMemory self.physicalDisk = toil.physicalDisk(config) if maxDisk > self.physicalDisk: log.warn('Limiting maxDisk to physically available disk (%i).', self.physicalDisk) maxDisk = self.physicalDisk super(SingleMachineBatchSystem, self).__init__(config, maxCores, maxMemory, maxDisk) assert self.maxCores >= self.minCores assert self.maxMemory >= 1 # The scale allows the user to apply a factor to each task's cores requirement, thereby # squeezing more tasks onto each core (scale < 1) or stretching tasks over more cores # (scale > 1). self.scale = config.scale # Number of worker threads that will be started self.debugWorker = config.debugWorker self.numWorkers = int(old_div(self.maxCores, self.minCores)) # A counter to generate job IDs and a lock to guard it self.jobIndex = 0 self.jobIndexLock = Lock() # A dictionary mapping IDs of submitted jobs to the command line self.jobs = {} """ :type: dict[str,toil.job.JobNode] """ # A queue of jobs waiting to be executed. Consumed by the workers. self.inputQueue = Queue() # A queue of finished jobs. Produced by the workers. self.outputQueue = Queue() # A dictionary mapping IDs of currently running jobs to their Info objects self.runningJobs = {} """ :type: dict[str,Info] """ # The list of worker threads self.workerThreads = [] """ :type list[Thread] """ # Variables involved with non-blocking resource acquisition self.acquisitionTimeout = 5 self.acquisitionRetryDelay = 10 self.aquisitionCondition = Condition() # A pool representing available CPU in units of minCores self.coreFractions = ResourcePool(self.numWorkers, 'cores', self.acquisitionTimeout) # A lock to work around the lack of thread-safety in Python's subprocess module self.popenLock = Lock() # A pool representing available memory in bytes self.memory = ResourcePool(self.maxMemory, 'memory', self.acquisitionTimeout) # A pool representing the available space in bytes self.disk = ResourcePool(self.maxDisk, 'disk', self.acquisitionTimeout) if not self.debugWorker: log.debug('Setting up the thread pool with %i workers, ' 'given a minimum CPU fraction of %f ' 'and a maximum CPU value of %i.', self.numWorkers, self.minCores, maxCores) for i in range(self.numWorkers): worker = Thread(target=self.worker, args=(self.inputQueue,)) self.workerThreads.append(worker) worker.start() else: log.debug('Started in worker debug mode.')
def __init__(self, config, maxCores, maxMemory, maxDisk): if maxCores > self.numCores: log.warning('Limiting maxCores to CPU count of system (%i).', self.numCores) maxCores = self.numCores if maxMemory > self.physicalMemory: log.warning( 'Limiting maxMemory to physically available memory (%i).', self.physicalMemory) maxMemory = self.physicalMemory self.physicalDisk = toil.physicalDisk(config) if maxDisk > self.physicalDisk: log.warning('Limiting maxDisk to physically available disk (%i).', self.physicalDisk) maxDisk = self.physicalDisk super(SingleMachineBatchSystem, self).__init__(config, maxCores, maxMemory, maxDisk) assert self.maxCores >= self.minCores assert self.maxMemory >= 1 # The scale allows the user to apply a factor to each task's cores requirement, thereby # squeezing more tasks onto each core (scale < 1) or stretching tasks over more cores # (scale > 1). self.scale = config.scale if config.badWorker > 0 and config.debugWorker: # We can't throw SIGUSR1 at the worker because it is also going to # be the leader and/or test harness. raise RuntimeError( "Cannot use badWorker and debugWorker together; " "worker would have to kill the leader") self.debugWorker = config.debugWorker # A counter to generate job IDs and a lock to guard it self.jobIndex = 0 self.jobIndexLock = Lock() # A dictionary mapping IDs of submitted jobs to the command line self.jobs = {} """ :type: dict[str,toil.job.JobNode] """ # A queue of jobs waiting to be executed. Consumed by the daddy thread. self.inputQueue = Queue() # A queue of finished jobs. Produced by the daddy thread. self.outputQueue = Queue() # A dictionary mapping IDs of currently running jobs to their Info objects self.runningJobs = {} """ :type: dict[str,Info] """ # These next two are only used outside debug-worker mode # A dict mapping PIDs to Popen objects for running jobs. # Jobs that don't fork are executed one at a time in the main thread. self.children = {} """ :type: dict[int,subprocess.Popen] """ # A dict mapping child PIDs to the Job IDs they are supposed to be running. self.childToJob = {} """ :type: dict[int,str] """ # A pool representing available CPU in units of minCores self.coreFractions = ResourcePool( int(old_div(self.maxCores, self.minCores)), 'cores') # A pool representing available memory in bytes self.memory = ResourcePool(self.maxMemory, 'memory') # A pool representing the available space in bytes self.disk = ResourcePool(self.maxDisk, 'disk') # We use this event to signal shutdown self.shuttingDown = Event() # A thread in charge of managing all our child processes. # Also takes care of resource accounting. self.daddyThread = None # If it breaks it will fill this in self.daddyException = None if self.debugWorker: log.debug('Started in worker debug mode.') else: self.daddyThread = Thread(target=self.daddy, daemon=True) self.daddyThread.start() log.debug('Started in normal mode.')