def workflow_config(name, nodes, cores_per_node=24, interval=30, monitor=False): import parsl from parsl.config import Config from parsl.channels import LocalChannel from parsl.launchers import SrunLauncher from parsl.providers import LocalProvider from parsl.addresses import address_by_interface from parsl.executors import HighThroughputExecutor from parsl.monitoring.monitoring import MonitoringHub parsl.set_stream_logger() parsl.set_file_logger('script.output', level=logging.DEBUG) logging.info('Configuring Parsl Workflow Infrastructure') #Read where datasets are... env_str = str() with open('parsl.env', 'r') as reader: env_str = reader.read() logging.info(f'Task Environment {env_str}') mon_hub = MonitoringHub( workflow_name=name, hub_address=address_by_interface('ib0'), hub_port=60001, resource_monitoring_enabled=True, monitoring_debug=False, resource_monitoring_interval=interval, ) if monitor else None config = Config( executors=[ HighThroughputExecutor( label=name, # Optional: The network interface on node 0 which compute nodes can communicate with. # address=address_by_interface('enp4s0f0' or 'ib0') address=address_by_interface('ib0'), # one worker per manager / node max_workers=cores_per_node, provider=LocalProvider( channel=LocalChannel(script_dir='.'), # make sure the nodes_per_block matches the nodes requested in the submit script in the next step nodes_per_block=nodes, # make sure launcher=SrunLauncher(overrides=f'-c {cores_per_node}'), cmd_timeout=120, init_blocks=1, max_blocks=1, worker_init=env_str, ), ) ], monitoring=mon_hub, strategy=None, ) logging.info('Loading Parsl Config') parsl.load(config) return
def _parsl_initialize(config=None): dfk = parsl.load(config) return dfk
help="Length of array where each array is s x s", ) parser.add_argument( "--proxy", action="store_true", help="Use proxy store to pass inputs", ) parser.add_argument( "--redis-port", type=int, default=None, help="If not None, use Redis backend", ) args = parser.parse_args() parsl.load() if args.proxy: if args.redis_port is None: store = ps.store.init_store("local") else: store = ps.store.init_store( "redis", hostname="127.0.0.1", port=args.redis_port, ) mapped_results = [] for _ in range(args.num_arrays): x = np.random.rand(args.size, args.size) if args.proxy:
parser.add_argument("--config", default=None, help="Parsl config to parallelize with") args = parser.parse_args() base_dir = '/'.join(os.path.abspath(__file__).split('/')[:-2]) if args.out_dir is None: args.out_dir = os.path.join(base_dir, 'data', 'processed') if args.config is None: args.config = os.path.join(base_dir, 'fusionsimulation', 'configs', 'local.py') spec = importlib.util.spec_from_file_location('', args.config) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) parsl.load(module.config) if args.container_type == 'singularity': image_path = '{base_dir}/docker/fusion-simulation.sif'.format( base_dir=base_dir) # FIXME may require too much memory on some machines if not os.path.isfile(image_path): print('downloading {}'.format(image_path)) subprocess.call( 'singularity build {image_path} docker://olopadelab/fusion-simulation' .format(image_path=image_path), shell=True) gene_id_to_gene_name_map_path = parse_gene_id_to_gene_name_map( args.gencode_annotation)
print(prev.filepath) for key in futs: if key > 0: fu = futs[key] data = open(fu.result().filepath, 'r').read().strip() assert data == str( key), "[TEST] incr failed for key: {0} got: {1}".format( key, data) cleanup_work(depth) if __name__ == '__main__': parsl.clear() dfk = parsl.load(config) parser = argparse.ArgumentParser() parser.add_argument("-w", "--width", default="5", help="width of the pipeline") parser.add_argument("-d", "--debug", action='store_true', help="Count of apps to launch") args = parser.parse_args() if args.debug: parsl.set_stream_logger()
from parsl.channels import SSHChannel from parsl.providers.local.local import LocalProvider import os remote_config = Config(executors=[ IPyParallelExecutor( label='remote_ipp', provider=LocalProvider( min_blocks=1, init_blocks=1, max_blocks=4, nodes_per_block=1, parallelism=0.5, channel=SSHChannel(hostname="localhost"), #worker_init='source /phys/groups/tev/scratch3/users/gwatts/anaconda3/etc/profile.d/conda.sh && conda activate parsl', worker_init= 'source /home/gwatts/anaconda3/etc/profile.d/conda.sh && export PYTHONPATH=$PYTHONPATH:{} && conda activate parsl_test' .format(os.getcwd()), move_files=False, )) ]) parsl.load(remote_config) # Run this and print out the result if os.path.isfile("all_hellos.txt"): os.unlink("all_hellos.txt") r = run_cat_test() with open(r.outputs[0].result(), 'r') as f: print(f.read()) print("Result from the test is {}".format(r.result()))
max_workers=nproc, provider=SlurmProvider( channel=LocalChannel(script_dir='parsl_slurm'), launcher=SrunLauncher(), max_blocks=(args.ncpu)+5, init_blocks=args.ncpu, partition='all', scheduler_options=sched_opts, # Enter scheduler_options if needed worker_init=wrk_init, # Enter worker_init if needed walltime='00:120:00' ), ) ], retries=20, ) dfk = parsl.load(slurm_htex) else: config = Config(executors=[ThreadPoolExecutor(max_threads=args.ncpu)]) parsl.load(config) # Write futures out_dict = {} # Output filename list run_futures = {} # Future list for key in sorted(sample_dict.keys()): run_futures[key] = [] # Make size batches batches = [] batch_list = [] batch_size, tot_size = 0, 0 for i, fname in enumerate(sample_dict[key]): batch_size += os.stat(fname).st_size
def main(): local_threads = Config( executors=[ThreadPoolExecutor(max_threads=8, label='local_threads')]) local_htex = Config( executors=[ HighThroughputExecutor( label="htex_Local", worker_debug=True, cores_per_worker=1, provider=LocalProvider( channel=LocalChannel(), init_blocks=1, max_blocks=1, worker_init=( 'export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES\n'), ), ) ], strategy=None, ) parsl.clear() #parsl.load(local_threads) parsl.load(local_htex) @bash_app def generate(outputs=[]): return "echo $(( RANDOM )) &> {}".format(outputs[0]) @bash_app def concat(inputs=[], outputs=[]): return "cat {0} > {1}".format(" ".join(i.filepath for i in inputs), outputs[0]) @python_app def total(inputs=[]): total = 0 with open(inputs[0], 'r') as f: for l in f: total += int(l) return total # Create 5 files with semi-random numbers print(f'Getting started?') output_files = [] for i in range(5): output_files.append( generate( outputs=[File(os.path.join(os.getcwd(), 'random-%s.txt' % i))])) # Concatenate the files into a single file print(f'before concat') cc = concat(inputs=[i.outputs[0] for i in output_files], outputs=[File(os.path.join(os.getcwd(), 'combined.txt'))]) # Calculate the sum of the random numbers total = total(inputs=[cc.outputs[0]]) print(total.result())
max_rtt = max(rtt) * 1000 avg_rtt = average(rtt) * 1000 print("App1_RTT | Min:{0:0.3}ms Max:{1:0.3}ms Average:{2:0.3}ms".format(min_rtt, max_rtt, avg_rtt)) rtt = app2_rtts min_rtt = min(rtt) * 1000 max_rtt = max(rtt) * 1000 avg_rtt = average(rtt) * 1000 print("App2_RTT | Min:{0:0.3}ms Max:{1:0.3}ms Average:{2:0.3}ms".format(min_rtt, max_rtt, avg_rtt)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("-c", "--count", default="10", help="Count of apps to launch") parser.add_argument("-d", "--debug", action='store_true', help="Count of apps to launch") args = parser.parse_args() # if args.debug: # parsl.set_stream_logger() parsl.load(local_config) x = test_simple(int(args.count))
# HighThroughputExecutor( # label="merges", # worker_debug=True, # cores_per_worker=1, # provider=LocalProvider( # channel=LocalChannel(), # init_blocks=1, # max_blocks=10, # ), # ), ], monitoring=monitoring, retries=2, retry_handler=retry_handler, ) dfk = parsl.load(slurm_htex) if args.chunkify: print("XXX") # os.mkdir("temp") for key, fnames in sample_dict.items(): output, metrics = processor.run_uproot_job( {key: fnames}, treename='Events', processor_instance=processor_object, executor=processor.parsl_executor, executor_args={ 'skipbadfiles': args.skip, 'savemetrics': True, 'schema': processor.NanoAODSchema, # 'mmap':True,
def _register_workflow(workflow_graph, parsl_config): """ For right now we will keep track of all unique combinations of resource requirements and how many of each. The maxBlocks can then be set to the number of each resource requirement. In the future we can try to be smarter by examining the workflow graph and deciding how many could possibly ever be running concurrently. Pipeline is single, Config is single * Assign all Apps to null executor * @App('python', dfk) <-- No executor= Pipeline is single, Config is multi * Assign all Apps to null executor (will be randomly assigned among configured executors) * Log warning of mismatch Pipeline is multi, Config is single * Assign all Apps to null executor * Log warning of mismatch Pipeline is multi, Config is multi, perfect match * Assign all Apps to their appropriate executor Pipeline is multi, Config is multi, some match * Assign apps that can to their appropriate executors * For the remaining, assign to first executor * Log warning of mismatch Pipeline is multi, Config is multi, no matches * Assign all apps to first executor * Log warning of mismatch :param workflow_graph: :param dfk: :return: """ # Regiser config with Parsl parsl.load(parsl_config) is_single_parsl_config = len(parsl_config.executors) <= 1 # Check to see if Pipeline is single or multi pipeline_executors = set(Meta._executors.keys()) # Start with anything defined in Meta is_single_pipeline_meta = len(pipeline_executors) <= 1 logger.debug('Pipeline is {}-{}'.format( 'single' if is_single_pipeline_meta else 'multi', 'single' if is_single_parsl_config else 'multi', )) app_factories = dict() # At a minimum define the 'all' executor, which is an executor with no specific label app_factories['all'] = ParslPipeline._generate_executor_app_factories() # If we have multiple executors, define them if not any((is_single_parsl_config, is_single_pipeline_meta)): for executor in parsl_config.executors: app_factories[executor.label] = ParslPipeline._generate_executor_app_factories(executor_name=executor.label) # Some data containers app_futures, data_futures = list(), dict() app_nodes_registered = { node_id: False for node_id in workflow_graph if workflow_graph.node[node_id]['type'] == 'app' } data_node_in_degree = { node_id: in_degree for node_id, in_degree in workflow_graph.in_degree(workflow_graph.nodes()) if workflow_graph.node[node_id]['type'] == 'data' } def register_app(app_node_id, workflow_graph): """ Recursive algorithm to traverse the workflow graph and register apps :param app_node_id: str ID of the app node to try to register with parsl :param workflow_graph: nx.DiGraph Directed graph representation of the workflow :return: list<AppFuture> All app futures generated by the workflow """ # Check if any input data nodes don't have data futures and have in-degree > 0 for input_dependency_node in workflow_graph.predecessors(app_node_id): if workflow_graph.nodes[input_dependency_node]['type'] == 'data': if input_dependency_node not in data_futures and data_node_in_degree[input_dependency_node] > 0: register_app(list(workflow_graph.predecessors(input_dependency_node))[0], workflow_graph) elif workflow_graph.nodes[input_dependency_node]['type'] == 'app': if not app_nodes_registered[input_dependency_node]: register_app(input_dependency_node, workflow_graph) # Register this app _app_blueprint = workflow_graph.node[app_node_id]['blueprint'] _app_inputs = [ data_futures.get(input_data) for input_data in _app_blueprint['inputs'] if data_futures.get(input_data) ] # If there are any app dependencies, add them if _app_blueprint['wait_on']: _app_inputs.extend([ app_nodes_registered.get(wait_on_app_id) for wait_on_app_id in _app_blueprint['wait_on'] if app_nodes_registered.get(wait_on_app_id) ]) # Select executor to run this app on executor_assignment = 'all' if not any((is_single_parsl_config, is_single_pipeline_meta)): # This is a multi-multi run, we might be able to assign to a executor # Giving a name defined in Meta takes precedence meta_executor = _app_blueprint.get('meta', {}).get('executor') # For backward compatibility with 'site' key if meta_executor is None and 'site' in _app_blueprint.get('meta', {}): meta_executor = _app_blueprint.get('meta', {}).get('site') if meta_executor is not None and meta_executor in app_factories: executor_assignment = meta_executor elif Meta._default_executor is not None and Meta._default_executor in app_factories: executor_assignment = Meta._default_executor # Create the App future with a specific executor App factory if _app_blueprint['type'] == 'bash': _app_future = app_factories[executor_assignment][BASH_APP]( cmd=_app_blueprint['cmd'], success_on=_app_blueprint['success_on'], inputs=_app_inputs, outputs=_app_blueprint['outputs'], stdout=_app_blueprint['stdout'], stderr=_app_blueprint['stderr'] ) else: _app_future = app_factories[executor_assignment][PYTHON_APP]( func_=_app_blueprint['func'], func_args=_app_blueprint['args'], func_kwargs=_app_blueprint['kwargs'], inputs=_app_inputs, outputs=_app_blueprint['outputs'], stdout=_app_blueprint['stdout'], stderr=_app_blueprint['stderr'] ) logger.info('{} assigned to executor {}, task id {}'.format(_app_blueprint['id'], executor_assignment, _app_future.tid)) app_futures.append((_app_blueprint['id'], _app_future)) # Set output data futures for data_fut in _app_future.outputs: if data_fut.filename not in data_futures: data_futures[data_fut.filename] = data_fut app_nodes_registered[app_node_id] = _app_future # Register all apps for app_node in app_nodes_registered: if not app_nodes_registered[app_node]: register_app(app_node, workflow_graph) # Gather files marked as temporary, if any tmp_files = [d for d in data_futures if Data(d).tmp] return app_futures, tmp_files
def local_setup(): config = fresh_config() config.executors[0].poll_period = 1 config.executors[0].max_workers = 1 parsl.load(config)
def test(): import parsl from pyscf import lib, gto, scf import numpy as np import pandas as pd import logging from parsl.config import Config from parsl.providers import LocalProvider from parsl.channels import LocalChannel from parsl.launchers import SimpleLauncher from parsl.executors import ExtremeScaleExecutor ncore = 4 config = Config( executors=[ ExtremeScaleExecutor(label="Extreme_Local", worker_debug=True, ranks_per_node=ncore, provider=LocalProvider( channel=LocalChannel(), init_blocks=1, max_blocks=1, launcher=SimpleLauncher())) ], strategy=None, ) parsl.load(config) mol = gto.M(atom='H 0. 0. 0.; H 0. 0. 2.0', unit='bohr', ecp='bfd', basis='bfd_vtz') mf = scf.RHF(mol).run() mol.output = None mol.stdout = None mf.output = None mf.stdout = None mf.chkfile = None from pyqmc import ExpCuspFunction, GaussianFunction, MultiplyWF, PySCFSlaterRHF, JastrowSpin, initial_guess, EnergyAccumulator from pyqmc.accumulators import PGradTransform, LinearTransform nconf = 1600 basis = [ ExpCuspFunction(2.0, 1.5), GaussianFunction(0.5), GaussianFunction(2.0), GaussianFunction(.25), GaussianFunction(1.0), GaussianFunction(4.0), GaussianFunction(8.0) ] wf = MultiplyWF(PySCFSlaterRHF(mol, mf), JastrowSpin(mol, basis, basis)) coords = initial_guess(mol, nconf) energy_acc = EnergyAccumulator(mol) pgrad_acc = PGradTransform( energy_acc, LinearTransform(wf.parameters, ['wf2acoeff', 'wf2bcoeff'])) from pyqmc.optsr import gradient_descent gradient_descent(wf, coords, pgrad_acc, vmc=distvmc, vmcoptions={ 'npartitions': ncore, 'nsteps': 100, 'nsteps_per': 100 })
def parsl_executor(items, function, accumulator, **kwargs): """Execute using parsl pyapp wrapper Parameters ---------- items : list List of input arguments function : callable A function to be called on each input, which returns an accumulator instance accumulator : AccumulatorABC An accumulator to collect the output of the function config : parsl.config.Config, optional A parsl DataFlow configuration object. Necessary if there is no active kernel .. note:: In general, it is safer to construct the DFK with ``parsl.load(config)`` prior to calling this function status : bool If true (default), enable progress bar unit : str Label of progress bar unit desc : str Label of progress bar description compression : int, optional Compress accumulator outputs in flight with LZ4, at level specified (default 1) Set to ``None`` for no compression. tailtimeout : int, optional Timeout requirement on job tails. Cancel all remaining jobs if none have finished in the timeout window. """ if len(items) == 0: return accumulator import parsl from parsl.app.app import python_app from .parsl.timeout import timeout status = kwargs.pop('status', True) unit = kwargs.pop('unit', 'items') desc = kwargs.pop('desc', 'Processing') clevel = kwargs.pop('compression', 1) tailtimeout = kwargs.pop('tailtimeout', None) if clevel is not None: function = _compression_wrapper(clevel, function) add_fn = _iadd cleanup = False config = kwargs.pop('config', None) try: parsl.dfk() except RuntimeError: cleanup = True pass if cleanup and config is None: raise RuntimeError( "No active parsl DataFlowKernel, must specify a config to construct one" ) elif not cleanup and config is not None: raise RuntimeError("An active parsl DataFlowKernel already exists") elif config is not None: parsl.clear() parsl.load(config) app = timeout(python_app(function)) futures = set(app(item) for item in items) _futures_handler(futures, accumulator, status, unit, desc, add_fn, tailtimeout) if cleanup: parsl.dfk().cleanup() parsl.clear() return accumulator
from functools import partial logger = logging.getLogger("parsl.appworkflow") parsl.set_stream_logger() parsl.set_stream_logger(__name__) logger.info("No-op log message to test log configuration") # import configuration after setting parsl logging, because interesting # construction happens during the configuration import configuration parsl.load(configuration.parsl_config) # given a commandline, wrap it so that we'll invoke # shifter appropriately (so that we don't need to # hardcode shifter / singularity command lines all # over the place. def shifter_wrapper(img, cmd): wrapped_cmd = "shifter --entrypoint --image={} {}".format(img, cmd) return wrapped_cmd def singularity_wrapper(img, inst_cat_root, work_and_out_path, cmd): wrapped_cmd = "singularity exec -B {},{},/projects/LSSTsky {} /projects/LSSTsky/Run3.0i/DESC_DC2_imSim_Workflow/docker_run.sh {}".format( inst_cat_root, work_and_out_path, img, cmd) return wrapped_cmd
lassen_config = Config( executors=[lassen_executor], strategy=None, ) distributed_remote_config = Config( executors=[local_executor, lassen_executor], strategy=None, ) print(parsl.__version__) parsl.set_stream_logger() parsl.clear() #parsl.load(local_config) parsl.load(distributed_remote_config) #parsl.load(m1_htex) #parsl.load(lassen_config) # build a string that loads conda correctly def load_conda(): return ('CONDA_BASE=$(conda info --base)\n' 'source ${CONDA_BASE}/etc/profile.d/conda.sh\n' 'conda deactivate\n' 'conda activate mirgeDriver.flame1d\n') #@bash_app(executors=['local_threads']) @bash_app(executors=['lassen_htex'])
def run(self, debug=False): """ Run trials provided by the optimizer while saving results. """ if debug: parsl.set_stream_logger() self._dfk = parsl.load(self.parsl_config) logger.info(f'Starting ParslRunner with config\n{self}') flag = True initialize_flag = True result = None for idx, parameter_configs in enumerate(self.optimizer): try: logger.info(f'Writing script with configs {parameter_configs}') command_script_path, command_script_content = self._writeScript( self.command, parameter_configs, 'command') if self.experiment.setup_template_string != None: _, setup_script_content = self._writeScript( self.experiment.setup_template_string, parameter_configs, 'setup') else: setup_script_content = None if self.experiment.finish_template_string != None: _, finish_script_content = self._writeScript( self.experiment.finish_template_string, parameter_configs, 'finish') else: finish_script_content = None # set warm-up experiments if initialize_flag: initialize_flag = False logger.info( f'Starting initializing trial with script at {command_script_path}' ) runConfig = paropt.runner.RunConfig( command_script_content=command_script_content, experiment_dict=self.experiment.asdict(), setup_script_content=setup_script_content, finish_script_content=finish_script_content, ) initializing_func_param = {} for key, val in self.obj_func_params.items(): initializing_func_param[key] = val initializing_func_param['timeout'] = 300 # result = self.obj_func(runConfig, **self.obj_func_params).result() result = self.obj_func(runConfig, **initializing_func_param).result() logger.info( f'Starting trial with script at {command_script_path}') runConfig = paropt.runner.RunConfig( command_script_content=command_script_content, experiment_dict=self.experiment.asdict(), setup_script_content=setup_script_content, finish_script_content=finish_script_content, ) result = None result = self.obj_func(runConfig, **self.obj_func_params).result() self._validateResult(parameter_configs, result) trial = Trial( outcome=result['obj_output'], parameter_configs=parameter_configs, run_number=self.run_number, experiment_id=self.experiment.id, obj_parameters=result['obj_parameters'], ) self.storage.saveResult(self.session, trial) self.optimizer.register(trial) self.run_result[ 'success'] = True and self.run_result['success'] flag = flag and self.run_result['success'] self.run_result['message'][ f'experiment {self.experiment.id} run {self.run_number}, config is {parameter_configs}'] = ( f'Successfully completed trials {idx} for experiment') except Exception as e: err_traceback = traceback.format_exc() if result is not None and result[ 'stdout'] == 'Timeout': # for timeCommandLimitTime in lib, timeout trial = Trial( outcome=result['obj_output'], parameter_configs=parameter_configs, run_number=self.run_number, experiment_id=self.experiment.id, obj_parameters=result['obj_parameters'], ) self.optimizer.register(trial) logger.exception(f'time out') self.storage.saveResult(self.session, trial) self.run_result['success'] = False self.run_result['message'][ f'experiment {self.experiment.id} run {self.run_number}, config is {parameter_configs}'] = ( f'Failed to complete trials {idx}:\nError: {e}\n{err_traceback}' ) else: trial = Trial( outcome=10000000, parameter_configs=parameter_configs, run_number=self.run_number, experiment_id=self.experiment.id, obj_parameters={}, ) self.storage.saveResult(self.session, trial) self.run_result['success'] = False self.run_result['message'][ f'experiment {self.experiment.id} run {self.run_number}, config is {parameter_configs}'] = ( f'Failed to complete trials {idx}:\nError: {e}\n{err_traceback}' ) logger.info(f'Finished; Run result: {self.run_result}')
import parsl from parsl.app.app import python_app, bash_app from config.midway import midway_htex from jinja2 import Template, Environment from jinja2.loaders import FileSystemLoader import os parsl.set_stream_logger() parsl.load(midway_htex) # Helper function to create an input file from a template def create_template(template_name, output_name, contents): fs_loader = FileSystemLoader('config_files/templates') env = Environment(loader=fs_loader) template = env.get_template(template_name) t_path = os.path.join("config_files", output_name) t_file = open(t_path, 'w') t_file.write(template.render(contents)) t_file.close() return t_path ################# # App definitions ################# @bash_app(executors=['midway_cpu']) def packmol(input_file=None, inputs=[], outputs=[], stdout=None, stderr=None): return "/scratch/midway2/chard/clean/packmol-17.163/packmol < %s" % input_file
#!/mnt/lustre_fs/users/kavotaw/apps/anaconda/anaconda3/envs/parsl_py36/bin/python3 import parsl, os from config import mcullaghcluster from library import * from parsl.data_provider.files import File parsl.set_stream_logger() parsl.load(mccluster) # RIGID DOCKING ligands = open('list.dat', 'r') for i in ligands: run = vina(inputs=[i], outputs=[rigid_docking_data]) run.result() # RIGID ML names, mols = getNamesMols(inputs=[ligands, rigid_docking_data]) names, features = getAllFeatures(inputs=[names, mols], outputs=[features_file]) train_and_test_svm_and_nn( inputs=[rigid_docking_data, features_file], outputs=[svm_model, r2_svm, nn_model, r2_nn, rigid_features_and_energies]) # SELECT TOP RESULTS percent = '10' select_top_percent(inputs=[percent, rigid_features_and_energies], outputs=[top_ligands]) # FLEXIBLE DOCKING for i in top_ligands: run = adfr(inputs=[i]) run.result()
import parsl from config import htex_config from library import increment parsl.load(htex_config) for i in range(5): print('{} + 1 = {}'.format(i, increment(i).result()))
def main(): args = parse_args(sys.argv[1:]) # Copy V-pipe repo as main working directory tmp_dir = join(cwd, 'tmp') vpipe_dir = join(tmp_dir, 'vpipe') base_params = { 'container': abspath(args.container), 'work_dir': tmp_dir, 'read_length': args.read_length, 'variant_frequency': args.variant_frequency, 'read_cutoff_bp': args.read_cutoff_bp, 'primers_bp': args.primers_bp, 'depth_cap': float(args.depth_cap), 'stdout': abspath(join(args.outputs, 'mappgene.stdout')), } if shutil.which('singularity') is None: raise Exception( f"Missing Singularity executable in PATH.\n\n" + f"Please ensure Singularity is installed: https://sylabs.io/guides/3.0/user-guide/installation.html" ) if not exists(base_params['container']): raise Exception( f"Missing container image at {base_params['container']}\n\n" + f"Either specify another image with --container\n" + f"Or build the container with the recipe at: {join(script_dir, 'data/container/recipe.def')}\n" + f"Or download the container with this command:\n\n$ singularity pull image.sif library://avilaherrera1/mappgene/image.sif:latest\n" ) smart_remove(tmp_dir) smart_mkdir(tmp_dir) run(f'cp -rf /opt/vpipe {vpipe_dir}', base_params) smart_copy(join(script_dir, 'data/extra_files'), tmp_dir) run(f'cd {vpipe_dir} && sh init_project.sh || true', base_params) update_permissions(tmp_dir, base_params) if args.test: args.inputs = join(script_dir, 'data/example_inputs/*.fastq.gz') if isinstance(args.inputs, str): args.inputs = glob(args.inputs) all_params = {} # Copy reads to subject directory for input_read in args.inputs: pair1 = input_read.replace('_R2', '_R1') pair2 = input_read.replace('_R1', '_R2') if input_read != pair1 and pair2 not in args.inputs: raise Exception(f'Missing paired read: {pair2}') if input_read != pair2 and pair1 not in args.inputs: raise Exception(f'Missing paired read: {pair1}') subject = (basename(input_read).replace('.fastq.gz', '').replace( '.fastq', '').replace('_R1', '').replace('_R2', '').replace('.', '_')) subject_dir = abspath(join(args.outputs, subject)) if not subject in all_params: smart_copy(tmp_dir, subject_dir) params = base_params.copy() params['work_dir'] = subject_dir params['input_reads'] = [input_read] params['stdout'] = join(subject_dir, 'worker.stdout') all_params[subject] = params else: all_params[subject]['input_reads'].append(input_read) if args.slurm: executor = parsl.executors.HighThroughputExecutor( label="worker", address=parsl.addresses.address_by_hostname(), provider=parsl.providers.SlurmProvider( args.partition, launcher=parsl.launchers.SrunLauncher(), nodes_per_block=int(args.nnodes), init_blocks=1, max_blocks=1, worker_init=f"export PYTHONPATH=$PYTHONPATH:{os.getcwd()}", walltime=args.walltime, scheduler_options="#SBATCH --exclusive\n#SBATCH -A {}\n". format(args.bank), move_files=False, ), ) elif args.flux: executor = parsl.executors.FluxExecutor( label="worker", flux_path= "/usr/global/tools/flux/toss_3_x86_64_ib/flux-c0.28.0.pre-s0.17.0.pre/bin/flux", provider=parsl.providers.SlurmProvider( args.partition, launcher=parsl.launchers.SrunLauncher(), nodes_per_block=int(args.nnodes), init_blocks=1, max_blocks=1, worker_init=f"export PYTHONPATH=$PYTHONPATH:{os.getcwd()}", walltime=args.walltime, scheduler_options="#SBATCH --exclusive\n#SBATCH -A {}\n". format(args.bank), move_files=False, ), ) else: executor = parsl.executors.ThreadPoolExecutor(label="worker") config = parsl.config.Config(executors=[executor]) parsl.set_stream_logger() parsl.load(config) if args.ivar: results = [] for params in all_params.values(): results.append(run_ivar(params)) for r in results: r.result() elif args.vpipe: results = [] for params in all_params.values(): results.append(run_vpipe(params)) for r in results: r.result()
def local_setup(): global dfk dfk = parsl.load(config)
max_blocks=30, min_blocks=0, overrides='module load apps/openmpi/gnu/3.0.0', queue='batch', channel=SSHChannel(hostname="va-murphy-login.kdi.local", username=os.getenv("USER").split('@')[0], script_dir=os.getenv("HOME") + "/code-va/parsl-workflows/ssh_scripts"), launcher=MpiExecLauncher(), walltime='4000:00:00')) ], retries=0) #local_config = Config(executors=[IPyParallelExecutor(label="local_ipp",provider=LocalProvider(channel=LocalChannel(),init_blocks=23,max_blocks=23))]) dfk = parsl.load(minimap_config) #parsl.load(local_config) #workers = ThreadPoolExecutor(max_workers=4) #dfk = DataFlowKernel(executors=[workers]) #dfk = DataFlowKernel(config = config) #workers = IPyParallelExecutor() #dfk = DataFlowKernel(workers) # submitting plink tool # plink2 --threads 36 --chr $chr --bfile chr$chr --export vcf id-paste=iid bgz --out chr$chr.plink @App('bash', executors=['PLINKandEagle'], cache=True) def plink2(b_inputs=[], chrom=[], outputs=[], stdout=None, stderr=None): out_prefix = outputs[0].replace(".vcf.gz", "")
scheduler_options='', # Command to be run before starting a worker, such as: worker_init='module load miniconda-3; export PATH=$PATH:{}'. format(MY_USER_PATH), cmd_timeout=120, ), ), ThreadPoolExecutor(label='login-node', max_threads=8), ], monitoring=MonitoringHub( hub_address=address_by_hostname(), hub_port=55055, monitoring_debug=False, resource_monitoring_interval=10, )) parsl.load(parsl_config) @python_app(executors=['theta-htex']) def pi(num_points): from random import random inside = 0 for i in range(num_points): x, y = random(), random() # Drop a random point in the box. if x**2 + y**2 < 1: # Count points within the circle. inside += 1 return (inside * 4 / num_points)
def setup_module(module): parsl.load(config)
import parsl import pytest from parsl import App from parsl.tests.configs.local_threads import config config['globals']['lazyErrors'] = True parsl.clear() parsl.load(config) @App('python') def divide(a, b): return a / b @pytest.mark.local def test_lazy_behavior(): """Testing non lazy errors to work""" items = [] for i in range(0, 1): items.append(divide(10, i)) while True: if items[0].done: break return if __name__ == "__main__":
def cli_run(): parser = argparse.ArgumentParser() parser.add_argument( "--redishost", default="127.0.0.1", help="Address at which the redis server can be reached") parser.add_argument("--redisport", default="6379", help="Port on which redis is available") parser.add_argument("-d", "--debug", action='store_true', help="Count of apps to launch") parser.add_argument("-m", "--mac", action='store_true', help="Configure for Mac") args = parser.parse_args() if args.debug: parsl.set_stream_logger() if args.mac: config = Config( executors=[ ThreadPoolExecutor(label="htex"), ThreadPoolExecutor(label="local_threads") ], strategy=None, ) else: config = Config( executors=[ HighThroughputExecutor( label="htex", # Max workers limits the concurrency exposed via mom node max_workers=2, provider=LocalProvider( init_blocks=1, max_blocks=1, ), ), ThreadPoolExecutor(label="local_threads") ], strategy=None, ) parsl.load(config) print( '''This program creates an "MPI Method Server" that listens on an inputs queue and write on an output queue: input_queue --> mpi_method_server --> queues To send it a request, add an entry to the inputs queue: run "pipeline-pump -p N" where N is an integer request To access a value, remove it from the outout queue: run "pipeline-pull" (blocking) or "pipeline-pull -t T" (T an integer) to time out after T seconds TODO: Timeout does not work yet! ''') # Get the queues for the method server method_queues = MethodServerQueues(args.redishost, port=args.redisport) # Start the method server mms = ParslMethodServer([target_fun], method_queues, default_executors=['htex']) mms.run()