def submit(self, traj): """ Submit a job to the work-queue for further sampling. Parameters ---------- """ if traj.submit_time is not None: raise ValueError("This traj has already been submitted") Session.add(traj) Session.flush() traj.populate_default_filenames() if not hasattr(traj, "init_pdb"): raise ValueError("Traj is supposed to have a pdb object tacked on") save_file(traj.init_pdb_fn, traj.init_pdb) remote_driver_fn = os.path.split(str(traj.forcefield.driver))[1] remote_pdb_fn = "input.pdb" remote_output_fn = "production_dry{}".format(traj.forcefield.output_extension) if traj.mode is None or traj.forcefield is None: raise ValueError("malformed traj") task = Task( "python ./{driver} {pdb_fn} {ff} {water} {mode} {threads}".format( pdb_fn=remote_pdb_fn, mode=traj.mode, driver=remote_driver_fn, ff=traj.forcefield.name, water=traj.forcefield.water, threads=traj.forcefield.threads, ) ) # why does traj.forcefield.driver come out as unicode? task.specify_input_file(str(traj.forcefield.driver), remote_driver_fn) task.specify_output_file(traj.wqlog_fn, "logs/driver.log") task.specify_input_file(traj.init_pdb_fn, remote_pdb_fn) task.specify_output_file(traj.dry_xtc_fn, remote_output_fn) if self.return_wet_xtc: # this is the XTC file with waters, generated by the driver # when you're doing implicit solvent only, this stuff is not used. remote_wet_output_fn = "production_wet{}".format(traj.forcefield.output_extension) task.specify_output_file(traj.wet_xtc_fn, remote_wet_output_fn) task.specify_output_file(traj.last_wet_snapshot_fn, "last_wet_snapshot.pdb") else: logger.debug("Not requesting production_wet%s from driver (implicit)", traj.forcefield.output_extension) task.specify_tag(str(traj.id)) task.specify_algorithm(WORK_QUEUE_SCHEDULE_FILES) # what does this do? traj.submit_time = datetime.now() # need to do a commit from this the qmaster, since this is a different # session Session.commit() self.wq.submit(task) logger.info("Submitted to queue: %s", traj)
def generate_tasks(command, task_inputs, infiles, outfile, tmpdir, max_retries=0): """Generate a set of WorkQueue tasks. Parameters ---------- command : str The shell command to execute on the remote worker. task_inputs : list List of input data to be sent to each task. Each entry in the list will be sent to exactly one task. infiles : list of str List of task-independent input files. These will be sent along with every task and cached on the worker between tasks. outfile : str Output task file. tmpdir : str Path to the output file store. Returns ------- taskmap : dict of str -> work_queue.Task The tasks to run mapped to their tag. """ taskmap = dict() for i in range(len(task_inputs)): t = Task(command) t.specify_tag(f'{str(uuid.uuid4())}_{str(i).zfill(8)}') t.specify_max_retries(max_retries) t.specify_buffer(json.dumps(task_inputs[i]), remote_name='input.json', flags=WORK_QUEUE_NOCACHE) for f in infiles: t.specify_input_file(f, remote_name=os.path.basename(f), flags=WORK_QUEUE_CACHE) t.specify_output_file(os.path.join(tmpdir, '_'.join([t.tag, outfile])), remote_name=outfile, flags=WORK_QUEUE_NOCACHE) taskmap[t.tag] = t return taskmap
def _work_queue_submit_wait(task_queue=multiprocessing.Queue(), launch_cmd=None, env=None, collector_queue=multiprocessing.Queue(), data_dir=".", full=False, shared_fs=False, autolabel=False, autolabel_window=None, autocategory=False, should_stop=None, port=WORK_QUEUE_DEFAULT_PORT, wq_log_dir=None, project_password_file=None, project_name=None): """Thread to handle Parsl app submissions to the Work Queue objects. Takes in Parsl functions submitted using submit(), and creates a Work Queue task with the appropriate specifications, which is then submitted to Work Queue. After tasks are completed, processes the exit status and exit code of the task, and sends results to the Work Queue collector thread. To avoid python's global interpreter lock with work queue's wait, this function should be launched as a process, not as a lightweight thread. This means that any communication should be done using the multiprocessing module capabilities, rather than shared memory. """ logger.debug("Starting WorkQueue Submit/Wait Process") # Enable debugging flags and create logging file wq_debug_log = None if wq_log_dir is not None: logger.debug("Setting debugging flags and creating logging file") wq_debug_log = os.path.join(wq_log_dir, "debug_log") # Create WorkQueue queue object logger.debug("Creating WorkQueue Object") try: logger.debug("Listening on port {}".format(port)) q = WorkQueue(port, debug_log=wq_debug_log) except Exception as e: logger.error("Unable to create WorkQueue object: {}".format(e)) raise e # Specify WorkQueue queue attributes if project_name: q.specify_name(project_name) if project_password_file: q.specify_password_file(project_password_file) if autolabel: q.enable_monitoring() if autolabel_window is not None: q.tune('category-steady-n-tasks', autolabel_window) # Only write logs when the wq_log_dir is specified, which it most likely will be if wq_log_dir is not None: wq_master_log = os.path.join(wq_log_dir, "master_log") wq_trans_log = os.path.join(wq_log_dir, "transaction_log") if full: wq_resource_log = os.path.join(wq_log_dir, "resource_logs") q.enable_monitoring_full(dirname=wq_resource_log) q.specify_log(wq_master_log) q.specify_transactions_log(wq_trans_log) orig_ppid = os.getppid() result_file_of_task_id = { } # Mapping taskid -> result file for active tasks. while not should_stop.value: # Monitor the task queue ppid = os.getppid() if ppid != orig_ppid: logger.debug("new Process") break # Submit tasks while task_queue.qsize() > 0 and not should_stop.value: # Obtain task from task_queue try: task = task_queue.get(timeout=1) logger.debug("Removing task from queue") except queue.Empty: continue pkg_pfx = "" if task.env_pkg is not None: pkg_pfx = "./{} -e {} ".format( os.path.basename(package_run_script), os.path.basename(task.env_pkg)) # Create command string logger.debug(launch_cmd) command_str = launch_cmd.format( package_prefix=pkg_pfx, mapping=os.path.basename(task.map_file), function=os.path.basename(task.function_file), result=os.path.basename(task.result_file)) logger.debug(command_str) # Create WorkQueue task for the command logger.debug("Sending task {} with command: {}".format( task.id, command_str)) try: t = Task(command_str) except Exception as e: logger.error("Unable to create task: {}".format(e)) collector_queue.put_nowait( WqTaskToParsl( id=task.id, result_received=False, result=None, reason="task could not be created by work queue", status=-1)) continue t.specify_category(task.category) if autolabel: q.specify_category_mode( task.category, WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT) # Specify environment variables for the task if env is not None: for var in env: t.specify_environment_variable(var, env[var]) if task.env_pkg is not None: t.specify_input_file(package_run_script, cache=True) t.specify_input_file(task.env_pkg, cache=True) # Specify script, and data/result files for task t.specify_input_file(exec_parsl_function.__file__, cache=True) t.specify_input_file(task.function_file, cache=False) t.specify_input_file(task.map_file, cache=False) t.specify_output_file(task.result_file, cache=False) t.specify_tag(str(task.id)) result_file_of_task_id[str(task.id)] = task.result_file logger.debug("Parsl ID: {}".format(task.id)) # Specify input/output files that need to be staged. # Absolute paths are assumed to be in shared filesystem, and thus # not staged by work queue. if not shared_fs: for spec in task.input_files: if spec.stage: t.specify_input_file(spec.parsl_name, spec.parsl_name, cache=spec.cache) for spec in task.output_files: if spec.stage: t.specify_output_file(spec.parsl_name, spec.parsl_name, cache=spec.cache) # Submit the task to the WorkQueue object logger.debug("Submitting task {} to WorkQueue".format(task.id)) try: wq_id = q.submit(t) except Exception as e: logger.error( "Unable to submit task to work queue: {}".format(e)) collector_queue.put_nowait( WqTaskToParsl( id=task.id, result_received=False, result=None, reason="task could not be submited to work queue", status=-1)) continue logger.debug("Task {} submitted to WorkQueue with id {}".format( task.id, wq_id)) # If the queue is not empty wait on the WorkQueue queue for a task task_found = True if not q.empty(): while task_found and not should_stop.value: # Obtain the task from the queue t = q.wait(1) if t is None: task_found = False continue # When a task is found: parsl_id = t.tag logger.debug( "Completed WorkQueue task {}, parsl task {}".format( t.id, t.tag)) result_file = result_file_of_task_id.pop(t.tag) # A tasks completes 'succesfully' if it has result file, # and it can be loaded. This may mean that the 'success' is # an exception. logger.debug("Looking for result in {}".format(result_file)) try: with open(result_file, "rb") as f_in: result = pickle.load(f_in) logger.debug("Found result in {}".format(result_file)) collector_queue.put_nowait( WqTaskToParsl(id=parsl_id, result_received=True, result=result, reason=None, status=t.return_status)) # If a result file could not be generated, explain the # failure according to work queue error codes. We generate # an exception and wrap it with RemoteExceptionWrapper, to # match the positive case. except Exception as e: reason = _explain_work_queue_result(t) logger.debug( "Did not find result in {}".format(result_file)) logger.debug( "Wrapper Script status: {}\nWorkQueue Status: {}". format(t.return_status, t.result)) logger.debug( "Task with id parsl {} / wq {} failed because:\n{}". format(parsl_id, t.id, reason)) collector_queue.put_nowait( WqTaskToParsl(id=parsl_id, result_received=False, result=e, reason=reason, status=t.return_status)) logger.debug("Exiting WorkQueue Monitoring Process") return 0
outputs = [] for i in range(5): ifile = 'msg.%d' % i ofile = 'out.%d' % i task = Task('cat < %s > %s' % (ifile, ofile)) task.specify_tag(str(time.time())) print task.command, task.tag task.specify_algorithm(WORK_QUEUE_SCHEDULE_FILES) print task.command, task.algorithm task.specify_buffer('hello from %d' % i, ifile, cache=False) if i % 2: task.specify_output_file(ofile, cache=False) else: task.specify_file(ofile, type=WORK_QUEUE_OUTPUT, cache=False) outputs.append(ofile) wq.submit(task) if wq.empty(): print 'work queue is empty' while not wq.empty(): t = wq.wait(10) if t: print t.tag print wq.stats.workers_init, wq.stats.workers_ready, wq.stats.workers_busy, \