def _runParasol(self, command, autoRetry=True): """ Issues a parasol command using popen to capture the output. If the command fails then it will try pinging parasol until it gets a response. When it gets a response it will recursively call the issue parasol command, repeating this pattern for a maximum of N times. The final exit value will reflect this. """ command = list(concat(self.parasolCommand, command)) while True: logger.debug('Running %r', command) process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=-1) stdout, stderr = process.communicate() status = process.wait() for line in stderr.decode('utf-8').split('\n'): if line: logger.warning(line) if status == 0: return 0, stdout.decode('utf-8').split('\n') message = 'Command %r failed with exit status %i' % (command, status) if autoRetry: logger.warning(message) else: logger.error(message) return status, None logger.warning('Waiting for a 10s, before trying again') time.sleep(10)
def _run(cls, command, *args, **kwargs): """ Run a command. Convenience wrapper for subprocess.check_call and subprocess.check_output. :param str command: The command to be run. :param str args: Any arguments to be passed to the command. :param Any kwargs: keyword arguments for subprocess.Popen constructor. Pass capture=True to have the process' stdout returned. Pass input='some string' to feed input to the process' stdin. :rtype: None|str :return: The output of the process' stdout if capture=True was passed, None otherwise. """ args = list(concat(command, args)) logger.info('Running %r', args) capture = kwargs.pop('capture', False) _input = kwargs.pop('input', None) if capture: kwargs['stdout'] = subprocess.PIPE if _input is not None: kwargs['stdin'] = subprocess.PIPE popen = subprocess.Popen(args, **kwargs) stdout, stderr = popen.communicate(input=_input) assert stderr is None if popen.returncode != 0: raise subprocess.CalledProcessError(popen.returncode, args) if capture: return stdout
def _run(cls, command: str, *args: str, **kwargs: Any) -> Optional[str]: """ Run a command. Convenience wrapper for subprocess.check_call and subprocess.check_output. :param command: The command to be run. :param args: Any arguments to be passed to the command. :param kwargs: keyword arguments for subprocess.Popen constructor. Pass capture=True to have the process' stdout returned. Pass input='some string' to feed input to the process' stdin. :return: The output of the process' stdout if capture=True was passed, None otherwise. """ argl = list(concat(command, args)) logger.info("Running %r", argl) capture = kwargs.pop("capture", False) _input = kwargs.pop("input", None) if capture: kwargs['stdout'] = subprocess.PIPE if _input is not None: kwargs['stdin'] = subprocess.PIPE popen = subprocess.Popen(args, universal_newlines=True, **kwargs) stdout, stderr = popen.communicate(input=_input) assert stderr is None if popen.returncode != 0: raise subprocess.CalledProcessError(popen.returncode, argl) if capture: return cast(Optional[str], stdout)
def testRestart(self): """ Test whether auto-deployment works on restart. """ with self._venvApplianceCluster() as (leader, worker): def userScript(): from toil.job import Job from toil.common import Toil # noinspection PyUnusedLocal def job(job, disk='10M', cores=1, memory='10M'): assert False if __name__ == '__main__': options = Job.Runner.getDefaultArgumentParser().parse_args( ) with Toil(options) as toil: if toil.config.restart: toil.restart() else: toil.start(Job.wrapJobFn(job)) userScript = self._getScriptSource(userScript) leader.deployScript(path=self.sitePackages, packagePath='foo.bar', script=userScript) pythonArgs = ['venv/bin/python', '-m', 'foo.bar'] toilArgs = [ '--logDebug', '--batchSystem=mesos', '--mesosMaster=localhost:5050', '--defaultMemory=10M', '/data/jobstore' ] command = concat(pythonArgs, toilArgs) self.assertRaises(subprocess.CalledProcessError, leader.runOnAppliance, *command) # Deploy an updated version of the script ... userScript = userScript.replace('assert False', 'assert True') leader.deployScript(path=self.sitePackages, packagePath='foo.bar', script=userScript) # ... and restart Toil. command = concat(pythonArgs, '--restart', toilArgs) leader.runOnAppliance(*command)
def issueBatchJob(self, jobDesc, job_environment: Optional[Dict[str, str]] = None): """ Issues parasol with job commands. """ self.checkResourceRequest(jobDesc.memory, jobDesc.cores, jobDesc.disk) MiB = 1 << 20 truncatedMemory = jobDesc.memory // MiB * MiB # Look for a batch for jobs with these resource requirements, with # the memory rounded down to the nearest megabyte. Rounding down # meams the new job can't ever decrease the memory requirements # of jobs already in the batch. if len(self.resultsFiles) >= self.maxBatches: raise RuntimeError('Number of batches reached limit of %i' % self.maxBatches) try: results = self.resultsFiles[(truncatedMemory, jobDesc.cores)] except KeyError: results = get_temp_file(rootDir=self.parasolResultsDir) self.resultsFiles[(truncatedMemory, jobDesc.cores)] = results # Prefix the command with environment overrides, optionally looking them up from the # current environment if the value is None command = ' '.join(concat('env', self.__environment(job_environment), jobDesc.command)) parasolCommand = ['-verbose', '-ram=%i' % jobDesc.memory, '-cpu=%i' % jobDesc.cores, '-results=' + results, 'add', 'job', command] # Deal with the cpus self.usedCpus += jobDesc.cores while True: # Process finished results with no wait try: jobID = self.cpuUsageQueue.get_nowait() except Empty: break if jobID in list(self.jobIDsToCpu.keys()): self.usedCpus -= self.jobIDsToCpu.pop(jobID) assert self.usedCpus >= 0 while self.usedCpus > self.maxCores: # If we are still waiting jobID = self.cpuUsageQueue.get() if jobID in list(self.jobIDsToCpu.keys()): self.usedCpus -= self.jobIDsToCpu.pop(jobID) assert self.usedCpus >= 0 # Now keep going while True: line = self._runParasol(parasolCommand)[1][0] match = self.parasolOutputPattern.match(line) if match is None: # This is because parasol add job will return success, even if the job was not # properly issued! logger.debug('We failed to properly add the job, we will try again after a 5s.') time.sleep(5) else: jobID = int(match.group(1)) self.jobIDsToCpu[jobID] = jobDesc.cores self.runningJobs.add(jobID) logger.debug("Got the parasol job id: %s from line: %s" % (jobID, line)) return jobID
def __enter__(self): with self.lock: image = applianceSelf() # Omitting --rm, it's unreliable, see https://github.com/docker/docker/issues/16575 args = list( concat('docker', 'run', '--entrypoint=' + self._entryPoint(), '--net=host', '-i', '--name=' + self.containerName, [ '--volume=%s:%s' % mount for mount in self.mounts.items() ], image, self._containerCommand())) logger.info('Running %r', args) self.popen = subprocess.Popen(args) self.start() self.__wait_running() return self
def _resourcePath(self): """ The path to the directory that should be used when shipping this module and its siblings around as a resource. """ if self.fromVirtualEnv: return self.dirPath elif '.' in self.name: return os.path.join(self.dirPath, self._rootPackage()) else: initName = self._initModuleName(self.dirPath) if initName: raise ResourceException( "Toil does not support loading a user script from a package directory. You " "may want to remove %s from %s or invoke the user script as a module via " "'PYTHONPATH=\"%s\" %s -m %s.%s'." % tuple(concat(initName, self.dirPath, exactPython, os.path.split(self.dirPath), self.name))) return self.dirPath
def _runningOnWorker(self): try: mainModule = sys.modules['__main__'] except KeyError: log.warning('Cannot determine main program module.') return False else: # If __file__ is not a valid attribute, it's because # toil is being run interactively, in which case # we can reasonably assume that we are not running # on a worker node. try: mainModuleFile = os.path.basename(mainModule.__file__) except AttributeError: return False workerModuleFiles = concat(('worker' + ext for ext in self.moduleExtensions), '_toil_worker') # the setuptools entry point return mainModuleFile in workerModuleFiles