def generate_tasks(command, task_inputs, infiles, outfile, tmpdir, max_retries=0): """Generate a set of WorkQueue tasks. Parameters ---------- command : str The shell command to execute on the remote worker. task_inputs : list List of input data to be sent to each task. Each entry in the list will be sent to exactly one task. infiles : list of str List of task-independent input files. These will be sent along with every task and cached on the worker between tasks. outfile : str Output task file. tmpdir : str Path to the output file store. Returns ------- taskmap : dict of str -> work_queue.Task The tasks to run mapped to their tag. """ taskmap = dict() for i in range(len(task_inputs)): t = Task(command) t.specify_tag(f'{str(uuid.uuid4())}_{str(i).zfill(8)}') t.specify_max_retries(max_retries) t.specify_buffer(json.dumps(task_inputs[i]), remote_name='input.json', flags=WORK_QUEUE_NOCACHE) for f in infiles: t.specify_input_file(f, remote_name=os.path.basename(f), flags=WORK_QUEUE_CACHE) t.specify_output_file(os.path.join(tmpdir, '_'.join([t.tag, outfile])), remote_name=outfile, flags=WORK_QUEUE_NOCACHE) taskmap[t.tag] = t return taskmap
print 'work queue is empty' outputs = [] for i in range(5): ifile = 'msg.%d' % i ofile = 'out.%d' % i task = Task('cat < %s > %s' % (ifile, ofile)) task.specify_tag(str(time.time())) print task.command, task.tag task.specify_algorithm(WORK_QUEUE_SCHEDULE_FILES) print task.command, task.algorithm task.specify_buffer('hello from %d' % i, ifile, cache=False) if i % 2: task.specify_output_file(ofile, cache=False) else: task.specify_file(ofile, type=WORK_QUEUE_OUTPUT, cache=False) outputs.append(ofile) wq.submit(task) if wq.empty(): print 'work queue is empty' while not wq.empty(): t = wq.wait(10) if t: print t.tag