def queue_up(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True): """ Submit a job to the Work Queue. @param[in] wq (Work Queue Object) @param[in] command (string) The command to run on the remote worker. @param[in] input_files (list of files) A list of locations of the input files. @param[in] output_files (list of files) A list of locations of the output files. """ global WQIDS task = work_queue.Task(command) cwd = os.getcwd() for f in input_files: lf = os.path.join(cwd,f) task.specify_input_file(lf,f,cache=False) for f in output_files: lf = os.path.join(cwd,f) task.specify_output_file(lf,f,cache=False) task.specify_algorithm(work_queue.WORK_QUEUE_SCHEDULE_FCFS) if tag == None: tag = command task.specify_tag(tag) taskid = wq.submit(task) if verbose: logger.info("Submitting command '%s' to the Work Queue, %staskid %i\n" % (command, "tag %s, " % tag if tag != command else "", taskid)) if tgt != None: WQIDS[tgt.name].append(taskid) else: WQIDS["None"].append(taskid)
def queue_up_src_dest(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True): """ Submit a job to the Work Queue. This function is a bit fancier in that we can explicitly specify where the input files come from, and where the output files go to. @param[in] wq (Work Queue Object) @param[in] command (string) The command to run on the remote worker. @param[in] input_files (list of 2-tuples) A list of local and remote locations of the input files. @param[in] output_files (list of 2-tuples) A list of local and remote locations of the output files. """ global WQIDS task = work_queue.Task(command) for f in input_files: # print f[0], f[1] task.specify_input_file(f[0],f[1],cache=False) for f in output_files: # print f[0], f[1] task.specify_output_file(f[0],f[1],cache=False) task.specify_algorithm(work_queue.WORK_QUEUE_SCHEDULE_FCFS) if tag == None: tag = command task.specify_tag(tag) taskid = wq.submit(task) if verbose: logger.info("Submitting command '%s' to the Work Queue, taskid %i\n" % (command, taskid)) if tgt != None: WQIDS[tgt.name].append(taskid) else: WQIDS["None"].append(taskid)
def add_task(self, cmd, tag, params, files=None, resource=None, value=None): """Create a task for this manager. Parameters ---------- cmd : str The command to run on the remote worker, e.g. ``echo hello`` or ``python script.py``. tag : str The tag to give this task. files : list of `WQFile` or `WQBuffer`, optional The input and output files and data buffers that this task will send/receive. Notes ----- Any command may be passed to the task to run, and the task makes stdout and stderr of the command available upon return, regardless of failure. See Also -------- `shadho.managers.workqueue.WQFile` `shadho.managers.workqueue.WQBuffer` `work_queue.Task` """ task = work_queue.Task(' '.join([cmd, tag])) task.specify_tag(tag) if files is None: files = [] for f in files: if isinstance(f, tuple): f = WQFile(f[0], remotepath=f[1], ftype=f[2], cache=f[3]) f.add_to_task(task) out = WQFile(os.path.join(self.tmpdir, '.'.join([tag, self.out_file])), remotepath=self.out_file, ftype='output', cache=False) buff = WQBuffer(str(json.dumps(params)), self.param_file, cache=False) out.add_to_task(task) buff.add_to_task(task) if resource is not None: if resource == 'cores': task.specify_cores(value) elif resource == 'feature': task.specify_requirement(value) else: task.specify_resource(resource, value) self.submit(task) self.tasks_submitted = self.stats.tasks_submitted
def create_work_queue_task(task_counter, tmpdir, function, input_args, fn_wrapper='mdsim.py'): """ Returns a Work Queue task to execute the python code output = function(*input_args) The python function and input arguments are written to files in the directory tmpdir/ using dill to serialize them. These files are used as inputs to the Work Queue task. The Work Queue task executes the python function as a shell command using the fn_wrapper (mdsim.py by default), which reads the input files, converts them to valid python values, and writes to a file the python value obtained from the function evaluation. This output file is sent back to the Work Queue manager, where it can be read and decoded to obtained a valid python value. Should an exception occur, it is returned as the value of the function. """ logger.debug("creating task {}: {}({})".format( task_counter, function.__name__, ','.join(str(arg) for arg in input_args))) args_file = os.path.join(tmpdir, "input_args_{}.p".format(task_counter)) fn_file = os.path.join(tmpdir, "function_{}.p".format(task_counter)) out_file = os.path.join(tmpdir, "out_{}.p".format(task_counter)) # Save args to a dilled file. with open(args_file, "wb") as wf: dill.dump(input_args, wf) # Save the function to a dilled file. with open(fn_file, "wb") as wf: dill.dump(function, wf) # Base command just invokes python on the function and data. command = "./{wrapper} {fn} {args} {out}".format( wrapper=os.path.basename(fn_wrapper), fn=os.path.basename(fn_file), args=os.path.basename(args_file), out=os.path.basename(out_file)) task = wq.Task(command) task.specify_tag(str(task_counter)) task.specify_input_file(fn_wrapper, cache=True) task.specify_input_file(fn_file, cache=False) task.specify_input_file(args_file, cache=False) task.specify_output_file(out_file, cache=False) task.specify_cores(1) task.specify_memory(250) #MB task.specify_disk(250) #MB return task
def new_task(self): cmd = self.cfg.executable.remotepath task = WQ.Task('./' + cmd) ### executable self.cfg.executable.add_to_task(task) ### cached files for wqf in self.cfg.getcache: wqf.add_to_task(task) return task
def submit(self, command, inputfiles, outputfiles): command += ' 2>&1' task = work_queue.Task(command) cwd = os.getcwd() for f in inputfiles: lf = os.path.join(cwd, f) task.specify_input_file(lf, f, cache=False) for f in outputfiles: lf = os.path.join(cwd, f) task.specify_output_file(lf, f, cache=False) task.specify_algorithm(work_queue.WORK_QUEUE_SCHEDULE_RAND) task.specify_tag(cwd) task.print_time = 60 taskid = self.wq.submit(task) return taskid
def create_task_sra(s): # Define constant values. sample = s['sample_name'] refid = s['refid'] sra = s['sra'] get_log = sample + '_get.txt' run_log = sample + '_log.txt' p1_path = sra + '_1.fastq' p2_path = sra + '_2.fastq' # Define tasks. get = 'time fastq-dump --split-files %s > %s 2>&1' % (sra, get_log) es_cmd = 'time ericscript -p %d -db %s --refid %s -name %s -o ./%s %s %s > %s 2>&1' \ % (avail_cores, references_remote, refid, sample, sample, p1_path, p2_path, run_log) cleanup = 'rm -r ${HOME}/ncbi' # Create & tag task. t = wq.Task('bash -c "' + ' && '.join([get, es_cmd]) + '"') t.specify_tag(sample) # Specify references folder as a cached input. t.specify_directory(references_local, references_remote, wq.WORK_QUEUE_INPUT, recursive=True, cache=True) # Specify anticipated outputs. # ... logs. t.specify_file(os.path.join(resultsdir, get_log), get_log, wq.WORK_QUEUE_OUTPUT, cache=False) # get log t.specify_file(os.path.join(resultsdir, run_log), run_log, wq.WORK_QUEUE_OUTPUT, cache=False) # ericscript log # ... results. t.specify_file(os.path.join(resultsdir, sample + '.results.filtered.tsv'), os.path.join(sample, sample + '.results.filtered.tsv'), wq.WORK_QUEUE_OUTPUT, cache=False) # filtered results t.specify_file(os.path.join(resultsdir, sample + '.results.total.tsv'), os.path.join(sample, sample + '.results.total.tsv'), wq.WORK_QUEUE_OUTPUT, cache=False) # total results t.specify_file(os.path.join(resultsdir, sample + '.Summary.RData'), os.path.join(sample, sample + '.Summary.RData'), wq.WORK_QUEUE_OUTPUT, cache=False) # RData summary return t
def create_task_irods(s): # Define constant values. sample = s['sample_name'] refid = s['refid'] p1_irods = s['p1'] p2_irods = s['p2'] get_log = sample + '_get.txt' run_log = sample + '_log.txt' p1_path = os.path.basename(p1_irods) p2_path = os.path.basename(p2_irods) # Define tasks. p1_get = 'time iget -TV %s . > %s 2>&1' % (p1_irods, get_log) p2_get = 'time iget -TV %s . >> %s 2>&1' % (p2_irods, get_log) es_cmd = 'time ericscript -p %d -db %s --refid %s -name %s -o ./%s %s %s > %s 2>&1' \ % (avail_cores, references_remote, refid, sample, sample, p1_path, p2_path, run_log) # Create & tag task. t = wq.Task('bash -c "' + ' && '.join([p1_get, p2_get, es_cmd]) + '"') t.specify_tag(sample) # Specify references folder as a cached input. t.specify_directory(references_local, references_remote, wq.WORK_QUEUE_INPUT, recursive=True, cache=True) # Specify anticipated outputs. # ... logs. t.specify_file(os.path.join(resultsdir, get_log), get_log, wq.WORK_QUEUE_OUTPUT, cache=False) # get log t.specify_file(os.path.join(resultsdir, run_log), run_log, wq.WORK_QUEUE_OUTPUT, cache=False) # ericscript log # ... results. t.specify_file(os.path.join(resultsdir, sample + '.results.filtered.tsv'), os.path.join(sample, sample + '.results.filtered.tsv'), wq.WORK_QUEUE_OUTPUT, cache=False) # filtered results t.specify_file(os.path.join(resultsdir, sample + '.results.total.tsv'), os.path.join(sample, sample + '.results.total.tsv'), wq.WORK_QUEUE_OUTPUT, cache=False) # total results t.specify_file(os.path.join(resultsdir, sample + '.Summary.RData'), os.path.join(sample, sample + '.Summary.RData'), wq.WORK_QUEUE_OUTPUT, cache=False) # RData summary return t
def new_task(self): """ Generate a new task object and assign it a program to run. Ensures each task has the correct set of supporting files. See WorkQueue.Config for information on task files. Parameters: None Returns: A new cctools WorkQueue.Task instance """ cmd = self.cfg.executable.remotepath task = WQ.Task('./' + cmd) ### executable self.cfg.executable.add_to_task(task) ### cached files for wqf in self.cfg.getcache: wqf.add_to_task(task) return task
import work_queue as WQ # in case we want ${USER} for master name from os import environ #### SET MASTER NAME HERE #### for example: master_name = environ['USER'] + '-my-first-master' master_name = environ['USER'] + '-my-first-master' # 1. run at some port at random q = WQ.WorkQueue(name=master_name, port=0) # 2. create a tasks that runs a command remotely, and ... t = WQ.Task('./sim.exe A B') # ...specify the name of input and output files t.specify_input_file('sim.exe', cache=True) t.specify_input_file('A') t.specify_output_file('B') # 3. submit the task to the queue q.submit(t) # 4. wait for all tasks to finish, 5 second timeout: while not q.empty(): t = q.wait(5) if t: print 'task {} finished'.format(t.id)
def work_queue_executor(items, function, accumulator, **kwargs): """Execute using Work Queue Parameters ---------- items : list List of input arguments function : callable A function to be called on each input, which returns an accumulator instance accumulator : AccumulatorABC An accumulator to collect the output of the function status : bool If true (default), enable progress bar unit : str Label of progress bar unit desc : str Label of progress bar description compression : int, optional Compress accumulator outputs in flight with LZ4, at level specified (default 1) Set to ``None`` for no compression. # work queue specific options: environment-file : str Python environment to use. Required. cores : int Number of cores for work queue task. If unset, use a whole worker. memory : int Amount of memory (in MB) for work queue task. If unset, use a whole worker. disk : int Amount of disk space (in MB) for work queue task. If unset, use a whole worker. resources-mode : one of 'fixed', or 'auto'. Default is 'fixed'. 'fixed' - allocate cores, memory, and disk specified for each task. 'auto' - use cores, memory, and disk as maximum values to allocate. Useful when the resources used by a task are not known, as it lets work queue find an efficient value for maximum throughput. debug-log : str Filename for debug output stats-log : str Filename for tasks statistics output transactions-log : str Filename for tasks lifetime reports output master-name : str Name to refer to this work queue master. Sets port to 0 (any available port) if port not given. port : int Port number for work queue master program. Defaults to 9123 if master-name not given. wrapper : str Wrapper script to run/open python environment tarball. Defaults to python_package_run found in PATH. print-stdout : bool If true (default), print the standard output of work queue task on completion. queue-mode : one of 'persistent' or 'one-per-stage'. Default is 'persistent'. 'persistent' - One queue is used for all stages of processing. 'one-per-stage' - A new queue is used for each of the stages of processing. resource-monitor : bool If true, (false is the default) turns on resource monitoring for Work Queue. """ try: import work_queue as wq import tempfile import dill import os from os.path import basename except ImportError as e: print('You must have Work Queue and dill installed to use work_queue_executor!') raise e global _wq_queue debug_log = kwargs.pop('debug-log', None) stats_log = kwargs.pop('stats-log', None) trans_log = kwargs.pop('transactions-log', None) master_name = kwargs.pop('master-name', None) port = kwargs.pop('port', None) if port is None: if master_name: port = 0 else: port = 9123 queue_mode = kwargs.pop('queue-mode', 'persistent') if _wq_queue is None or queue_mode == 'one-per-stage': _wq_queue = wq.WorkQueue(port, name=master_name, debug_log=debug_log, stats_log=stats_log, transactions_log=trans_log) print('Listening for work queue workers on port {}...'.format(_wq_queue.port)) unit = kwargs.pop('unit', 'items') status = kwargs.pop('status', True) desc = kwargs.pop('desc', 'Processing') clevel = kwargs.pop('compression', 1) filepath = kwargs.pop('filepath', '.') output = kwargs.pop('print-stdout', False) if clevel is not None: function = _compression_wrapper(clevel, function) # work queue specific options: env_file = kwargs.pop('environment-file', None) wrapper = kwargs.pop('wrapper', shutil.which('python_package_run')) if not env_file: raise TypeError("environment-file argument missing. It should name a conda environment as a tar file.") elif not os.path.exists(env_file): raise ValueError("environment-file does not name an existing conda environment as a tar file.") if not wrapper: raise ValueError("Location of python_package_run could not be determined automatically.\nUse 'wrapper' argument to the work_queue_executor.") # fixed, or auto resources_mode = kwargs.pop('resources-mode', 'fixed') cores = kwargs.pop('cores', None) memory = kwargs.pop('memory', None) disk = kwargs.pop('disk', None) resource_monitor = kwargs.pop('resource-monitor', False) default_resources = {} if cores: default_resources['cores'] = cores if memory: default_resources['memory'] = memory if disk: default_resources['disk'] = disk with tempfile.TemporaryDirectory(prefix="wq-executor-tmp-", dir=filepath) as tmpdir: # Pickle function with open(os.path.join(tmpdir, 'function.p'), 'wb') as wf: dill.dump(function, wf) # Set up Work Queue command_path = _coffea_fn_as_file_wrapper(tmpdir) if resource_monitor: _wq_queue.enable_monitoring() _wq_queue.specify_category_max_resources('default', default_resources) if resources_mode == 'auto': _wq_queue.tune('category-steady-n-tasks', 3) _wq_queue.specify_category_max_resources('default', {}) _wq_queue.specify_category_mode('default', wq.WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT) # Define function input here infile_function = os.path.join(tmpdir, 'function.p') # Dictionary to keep track of output file corresponding to task id id_output = {} # Iterative Executor Specifications if len(items) == 0: return accumulator add_fn = _iadd for i, item in tqdm(enumerate(items), disable=not status, unit=unit, total=len(items), desc=desc): with open(os.path.join(tmpdir, 'item_{}.p'.format(i)), 'wb') as wf: dill.dump(item, wf) infile_item = os.path.join(tmpdir, 'item_{}.p'.format(i)) outfile = os.path.join(tmpdir, 'output_{}.p'.format(i)) coffea_command = 'python {} {} {} {}'.format(basename(command_path), basename(infile_function), basename(infile_item), basename(outfile)) wrapped_command = './{}'.format(basename(wrapper)) wrapped_command += ' --environment {}'.format(basename(env_file)) wrapped_command += ' --unpack-to "$WORK_QUEUE_SANDBOX"/{}-env {}'.format(env_file, coffea_command) t = wq.Task(wrapped_command) t.specify_category('default') t.specify_input_file(command_path, cache=True) t.specify_input_file(infile_function, cache=False) t.specify_input_file(infile_item, cache=False) # conda environment files t.specify_input_file(env_file, cache=True) t.specify_input_file(wrapper, cache=True) if re.search('://', item.filename): # This looks like an URL. Not transfering file. pass else: t.specify_input_file(item.filename, remote_name=item.filename, cache=True) t.specify_output_file(outfile, cache=False) task_id = _wq_queue.submit(t) # Add pair to dict id_output['{}'.format(task_id)] = outfile print('Submitted task (id #{}): {}'.format(task_id, wrapped_command)) print('Waiting for tasks to complete...') while not _wq_queue.empty(): t = _wq_queue.wait(5) if t: print('Task (id #{}) complete: {} (return code {})'.format(t.id, t.command, t.return_status)) if output: print('Output:\n{}'.format(t.output)) print('allocated cores: {}, memory: {} MB, disk: {} MB'.format( t.resources_allocated.cores, t.resources_allocated.memory, t.resources_allocated.disk)) if resource_monitor: print('measured cores: {}, memory: {} MB, disk {} MB, runtime {}'.format( t.resources_measured.cores, t.resources_measured.memory, t.resources_measured.disk, t.resources_measured.wall_time / 1000000)) if t.result != 0: print('Task id #{} failed with code: {}'.format(t.id, t.result)) print('Stopping execution') break # Unpickle output, add to accumulator with open(id_output['{}'.format(t.id)], 'rb') as rf: unpickle_output = dill.load(rf) add_fn(accumulator, unpickle_output) if os.path.exists(command_path): os.remove(command_path) return accumulator
#!/usr/bin/env python import work_queue import os work_queue.set_debug_flag('all') wq = work_queue.WorkQueue(port=work_queue.WORK_QUEUE_RANDOM_PORT, exclusive=False, shutdown=True) wq.specify_name('test') for i in range(5): task = work_queue.Task('date') task.specify_algorithm(work_queue.WORK_QUEUE_SCHEDULE_FCFS) task.specify_tag('current date/time [%d]' % i) task.specify_input_file('/bin/date') print task.id print task.algorithm print task.command print task.tag wq.submit(task) os.environ['PATH'] = '../../../dttools/src:' + os.environ['PATH'] os.system('work_queue_worker -d all -t 5 localhost %d &' % wq.port) while not wq.empty(): print '** wait for task' task = wq.wait(1) if task: print 'task'
shutil.copyfile('/bin/cat', path.join(test_dir, exec_file)) os.chmod(path.join(test_dir, exec_file), stat.S_IRWXU) q = wq.WorkQueue(0) with open(port_file, 'w') as f: print('Writing port {port} to file {file}'.format(port=q.port, file=port_file)) f.write(str(q.port)) # simple task # define a task, sending stderr to console, and stdout to output output = output_file() t = wq.Task("./{exe} {input} 2>&1 > {output}".format(exe=exec_file, input=input_file, output=output)) t.specify_input_file(path.join(test_dir, exec_file), exec_file) t.specify_input_file(path.join(test_dir, input_file), input_file) t.specify_output_file(path.join(test_dir, output), output) q.submit(t) t = q.wait(5) report_task(t, wq.WORK_QUEUE_RESULT_SUCCESS, 0, [path.join(test_dir, output)]) # same simple task, but now we send the directory as an input output = output_file() t = wq.Task("cd my_dir && ./{exe} {input} 2>&1 > {output}".format( exe=exec_file, input=input_file, output=output)) t.specify_directory(test_dir, 'my_dir', recursive=True) t.specify_output_file(path.join(test_dir, output), path.join('my_dir', output))
PORT = 9199 TASKS = 100 ALPHABET = string.ascii_lowercase + string.digits if __name__ == '__main__': JOURNAL = json.load(open('journal.json')) queue = work_queue.WorkQueue(PORT, name='hulk-amunch', catalog=True) queue.specify_log('fury.log') for num in range(1, 6): command = './hulk.py -l {}'.format(num) if command in JOURNAL: print >> sys.stderr, 'Already did', command else: task = work_queue.Task(command) for source in ('hulk.py', HASHES): task.specify_file(source, source, work_queue.WORK_QUEUE_INPUT) queue.submit(task) for num in range(1, 4): for prefix in itertools.product(ALPHABET, repeat=int(num)): prefix = ''.join(prefix) command = './hulk.py -l 5 -p {}'.format(prefix) if command in JOURNAL: print >> sys.stderr, 'Already did', command else: task = work_queue.Task(command) for source in ('hulk.py', HASHES): task.specify_file(source, source, work_queue.WORK_QUEUE_INPUT)
def sprint(self): with util.PartiallyMutable.unlock(): self.source = TaskProvider(self.config) action = actions.Actions(self.config, self.source) logger.info("using wq from {0}".format(wq.__file__)) logger.info("running Lobster version {0}".format(util.get_version())) logger.info("current PID is {0}".format(os.getpid())) wq.cctools_debug_flags_set("all") wq.cctools_debug_config_file( os.path.join(self.config.workdir, "work_queue_debug.log")) wq.cctools_debug_config_file_size(1 << 29) self.queue = wq.WorkQueue(self.config.advanced.wq_port) self.queue.specify_min_taskid(self.source.max_taskid() + 1) self.queue.specify_log( os.path.join(self.config.workdir, "work_queue.log")) self.queue.specify_transactions_log( os.path.join(self.config.workdir, "transactions.log")) self.queue.specify_name("lobster_" + self.config.label) self.queue.specify_keepalive_timeout(300) # self.queue.tune("short-timeout", 600) self.queue.tune("transfer-outlier-factor", 4) self.queue.specify_algorithm(wq.WORK_QUEUE_SCHEDULE_RAND) if self.config.advanced.full_monitoring: self.queue.enable_monitoring_full(None) else: self.queue.enable_monitoring(None) logger.info("starting queue as {0}".format(self.queue.name)) abort_active = False abort_threshold = self.config.advanced.abort_threshold abort_multiplier = self.config.advanced.abort_multiplier wq_max_retries = self.config.advanced.wq_max_retries if util.checkpoint(self.config.workdir, 'KILLED') == 'PENDING': util.register_checkpoint(self.config.workdir, 'KILLED', 'RESTART') # time in seconds to wait for WQ to return tasks, with minimum wait # time in case no more tasks are waiting interval = 120 interval_minimum = 30 tasks_left = 0 units_left = 0 successful_tasks = 0 categories = [] self.setup_logging('all') # Workflows can be assigned categories, with each category having # different cpu/memory/walltime requirements that WQ will automatically # fine-tune for category in self.config.categories: constraints = category.wq() if category.name != 'merge': categories.append(category.name) self.setup_logging(category.name) self.queue.specify_category_mode(category.name, category.mode) if category.mode == wq.WORK_QUEUE_ALLOCATION_MODE_FIXED: self.queue.specify_category_max_resources( category.name, constraints) else: self.queue.specify_category_first_allocation_guess( category.name, constraints) logger.debug('Category {0}: {1}'.format(category.name, constraints)) if 'wall_time' not in constraints: self.queue.activate_fast_abort_category( category.name, abort_multiplier) proxy_email_sent = False while not self.source.done(): with self.measure('status'): tasks_left = self.source.tasks_left() units_left = self.source.work_left() logger.debug("expecting {0} tasks, still".format(tasks_left)) self.queue.specify_num_tasks_left(tasks_left) for c in categories + ['all']: self.log(c, units_left) if util.checkpoint(self.config.workdir, 'KILLED') == 'PENDING': util.register_checkpoint(self.config.workdir, 'KILLED', str(datetime.datetime.utcnow())) # let the task source shut down gracefully logger.info("terminating task source") self.source.terminate() logger.info("terminating gracefully") break with self.measure('create'): have = {} for c in categories: cstats = self.queue.stats_category(c) have[c] = { 'running': cstats.tasks_running, 'queued': cstats.tasks_waiting } stats = self.queue.stats_hierarchy tasks = self.source.obtain(stats.total_cores, have) expiry = None if self.config.advanced.proxy: expiry = self.config.advanced.proxy.expires() proxy_time_left = self.config.advanced.proxy.time_left() if proxy_time_left >= 24 * 3600: proxy_email_sent = False if proxy_time_left < 24 * 3600 and not proxy_email_sent: util.sendemail( "Your proxy is about to expire.\n" + "Timeleft: " + str(datetime.timedelta(seconds=proxy_time_left)), self.config) proxy_email_sent = True for category, cmd, id, inputs, outputs, env, dir in tasks: task = wq.Task(cmd) task.specify_category(category) task.specify_tag(id) task.specify_max_retries(wq_max_retries) task.specify_monitor_output( os.path.join(dir, 'resource_monitor')) for k, v in env.items(): task.specify_environment_variable(k, v) for (local, remote, cache) in inputs: cache_opt = wq.WORK_QUEUE_CACHE if cache else wq.WORK_QUEUE_NOCACHE if os.path.isfile(local) or os.path.isdir(local): task.specify_input_file(str(local), str(remote), cache_opt) else: logger.critical( "cannot send file to worker: {0}".format( local)) raise NotImplementedError for (local, remote) in outputs: task.specify_output_file(str(local), str(remote)) if expiry: task.specify_end_time(expiry * 10**6) self.queue.submit(task) with self.measure('status'): stats = self.queue.stats_hierarchy logger.info( "{0} out of {1} workers busy; {2} tasks running, {3} waiting; {4} units left" .format(stats.workers_busy, stats.workers_busy + stats.workers_ready, stats.tasks_running, stats.tasks_waiting, units_left)) with self.measure('update'): self.source.update(self.queue) # recurring actions are triggered here; plotting etc should run # while we have WQ hand us back tasks w/o any database # interaction with self.measure('action'): if action: action.take() with self.measure('fetch'): starttime = time.time() task = self.queue.wait(interval) tasks = [] while task: if task.return_status == 0: successful_tasks += 1 elif task.return_status in self.config.advanced.bad_exit_codes: logger.warning( "blacklisting host {0} due to bad exit code from task {1}" .format(task.hostname, task.tag)) self.queue.blacklist(task.hostname) tasks.append(task) remaining = int(starttime + interval - time.time()) if (interval - remaining < interval_minimum or self.queue.stats.tasks_waiting > 0 ) and remaining > 0: task = self.queue.wait(remaining) else: task = None # TODO do we really need this? We have everything based on # categories by now, so this should not be needed. if abort_threshold > 0 and successful_tasks >= abort_threshold and not abort_active: logger.info( "activating fast abort with multiplier: {0}".format( abort_multiplier)) abort_active = True self.queue.activate_fast_abort(abort_multiplier) if len(tasks) > 0: try: with self.measure('return'): self.source.release(tasks) except Exception: tb = traceback.format_exc() logger.critical( "cannot recover from the following exception:\n" + tb) util.sendemail( "Your Lobster project has crashed from the following exception:\n" + tb, self.config) for task in tasks: logger.critical( "tried to return task {0} from {1}".format( task.tag, task.hostname)) raise if units_left == 0: logger.info("no more work left to do") util.sendemail("Your Lobster project is done!", self.config) if self.config.elk: self.config.elk.end() if action: action.take(True)
# ts = q.wait(wq.WORK_QUEUE_WAITFORTASK) time.sleep(2) seq = os.path.isdir('/proc/' + str(pid)) if not seq: sys.exit() nfiles = glob.glob(wdir + '/*.cmd') for file in nfiles: command = readCMD(file) removeCMD(file) cmDict = parseCMD(command) t = wq.Task(cmDict['CMD']) t.specify_cores(int(cmDict['wqCMD']['-num_threads'])) t.specify_algorithm(wq.WORK_QUEUE_SCHEDULE_FILES) # t.specify_memory(mem) t.specify_file('/usr/local/bin/'+cmDict['BLAST'], cmDict['BLAST'], \ wq.WORK_QUEUE_INPUT, cache=True) t.specify_file(cmDict['sqCMD']['SEQ'], cmDict['wqCMD']['-query'].strip("'"), \ wq.WORK_QUEUE_INPUT, cache=True) t.specify_file(cmDict['sqCMD']['REP'], cmDict['wqIO']['>'], \ wq.WORK_QUEUE_OUTPUT, cache=True) # t.specify_file(cmDict['sqCMD']['LOG'], cmDict['wqIO']['2>'], \ # wq.WORK_QUEUE_OUTPUT, cache=False)
q.specify_transactions_log('my_transactions.log') print 'WorkQueue on port: {}'.format(q.port) # enable the measuring of resources q.enable_monitoring() # create a category for all tasks q.specify_category_max_resources('my-tasks', {'cores': 1, 'disk': 500}) q.specify_category_mode('my-tasks', WQ.WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT) # create 30 tasks. Each task simply creates a 200MB file, using 10MB of memory # buffer. for i in range(0, 30): t = WQ.Task('python task.py') t.specify_input_file('task.py', cache=True) t.specify_category('my-tasks') t.specify_max_retries(2) q.submit(t) # create a task that will break the limits set t = WQ.Task('python task.py 1000') t.specify_input_file('task.py', cache=True) t.specify_category('my-tasks') t.specify_max_retries(2) q.submit(t) # wait for task to finish while not q.empty(): t = q.wait(60)