def __init__(self, channel=LocalChannel(), account=None, queue=None, scheduler_options='', worker_init='', nodes_per_block=1, init_blocks=1, min_blocks=0, max_blocks=100, parallelism=1, launcher=AprunLauncher(), walltime="00:20:00", cmd_timeout=120): label = 'torque' super().__init__(label, channel, nodes_per_block, init_blocks, min_blocks, max_blocks, parallelism, walltime, launcher, cmd_timeout=cmd_timeout) self.account = account self.queue = queue self.scheduler_options = scheduler_options self.worker_init = worker_init self.provisioned_blocks = 0 self.template_string = template_string # Dictionary that keeps track of jobs, keyed on job_id self.resources = {}
def __init__(self, channel=LocalChannel(), nodes_per_block=1, init_blocks=0, min_blocks=0, max_blocks=1, parallelism=1, walltime="00:10:00", account=None, queue=None, scheduler_options='', worker_init='', launcher=AprunLauncher(), cmd_timeout=10): label = 'cobalt' super().__init__(label, channel=channel, nodes_per_block=nodes_per_block, init_blocks=init_blocks, min_blocks=min_blocks, max_blocks=max_blocks, parallelism=parallelism, walltime=walltime, launcher=launcher, cmd_timeout=cmd_timeout) self.account = account self.queue = queue self.scheduler_options = scheduler_options self.worker_init = worker_init
def local_setup(): threads_config = Config( executors=[ HighThroughputExecutor( label="theta_htex", # worker_debug=True, cores_per_worker=4, provider=CobaltProvider( queue='debug-flat-quad', account='CSC249ADCD01', launcher=AprunLauncher(overrides="-d 64"), worker_init='source activate parsl-issues', init_blocks=1, max_blocks=1, min_blocks=1, nodes_per_block=4, cmd_timeout=60, walltime='00:10:00', ), ) ], monitoring=MonitoringHub(hub_port=55055, logging_level=logging.DEBUG, resource_monitoring_interval=10), strategy=None) parsl.load(threads_config)
def fresh_config(): config = Config( executors=[ HighThroughputExecutor( label="bw_htex", cores_per_worker=1, worker_debug=False, max_workers=1, address=address_by_hostname(), provider=TorqueProvider( queue='normal', launcher=AprunLauncher(overrides="-b -- bwpy-environ --"), # string to prepend to #SBATCH blocks in the submit # script to the scheduler eg: '#SBATCH --constraint=knl,quad,cache' scheduler_options='', # Command to be run before starting a worker, such as: # 'module load Anaconda; source activate parsl_env'. worker_init=user_opts['bluewaters']['worker_init'], init_blocks=1, max_blocks=1, min_blocks=1, nodes_per_block=2, walltime='00:30:00', cmd_timeout=120, ), ) ], ) return config
def fresh_config(): return Config( executors=[ HighThroughputExecutor( label='theta_local_htex_multinode', max_workers=1, address=address_by_hostname(), provider=CobaltProvider( queue=user_opts['theta']['queue'], account=user_opts['theta']['account'], launcher=AprunLauncher(overrides="-d 64"), walltime='00:10:00', nodes_per_block=2, init_blocks=1, max_blocks=1, # string to prepend to #COBALT blocks in the submit # script to the scheduler eg: '#COBALT -t 50' scheduler_options='', # Command to be run before starting a worker, such as: # 'module load Anaconda; source activate parsl_env'. worker_init=user_opts['theta']['worker_init'], cmd_timeout=120, ), ) ], )
def theta_nwchem_config(log_dir: str, nodes_per_nwchem: int = 2, total_nodes: int = int( os.environ.get("COBALT_JOBSIZE", 1)), ml_prefetch: int = 0) -> Config: """Theta configuration where QC workers sit on the launch node (to be able to aprun) and ML workers are placed on compute nodes Args: nodes_per_nwchem: Number of nodes per NWChem computation log_dir: Path to store monitoring DB and parsl logs total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE ml_prefetch: Number of tasks for ML workers to prefetch for inference Returns: (Config) Parsl configuration """ assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task" nwc_workers = total_nodes // nodes_per_nwchem return Config( executors=[ ThreadPoolExecutor(label='qc', max_threads=nwc_workers), HighThroughputExecutor( address=address_by_hostname(), label="ml", max_workers=1, prefetch_capacity=ml_prefetch, provider=LocalProvider( nodes_per_block= nodes_per_nwchem, # Minimum increment in blcoks init_blocks=0, max_blocks=total_nodes // nodes_per_nwchem, # Limits the number of manager processes, launcher=AprunLauncher( overrides='-d 256 --cc depth -j 4' ), # Places worker on the compute node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ) ], monitoring=MonitoringHub( hub_address=address_by_hostname(), monitoring_debug=False, resource_monitoring_interval=10, logdir=log_dir, logging_endpoint= f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'), run_dir=log_dir, strategy='simple', max_idletime=15.)
def parsl_config(name: str) -> Tuple[Config, int]: """Make the compute resource configuration Args: name: Name of the diesred configuration Returns: - Parsl compute configuration - Number of compute slots: Includes execution slots and pre-fetch buffers """ if name == 'local': return Config( executors=[ HighThroughputExecutor(max_workers=16, prefetch_capacity=1) ] ), 64 elif name == 'theta-debug': return Config( retries=16, executors=[HighThroughputExecutor( address=address_by_hostname(), label="debug", max_workers=64, prefetch_capacity=64, cpu_affinity='block', provider=CobaltProvider( account='redox_adsp', queue='debug-flat-quad', nodes_per_block=8, scheduler_options='#COBALT --attrs enable_ssh=1', walltime='00:60:00', init_blocks=0, max_blocks=1, cmd_timeout=360, launcher=AprunLauncher(overrides='-d 64 --cc depth -j 1'), worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env''', ), )] ), 64 * 8 * 4 else: raise ValueError(f'Configuration not defined: {name}')
with Pool(args.local_workers) as p: database['invalid'] = p.map(is_smiles_valid, database['smiles']) database.query('not invalid', inplace=True) logging.info(f'Found {len(database)} valid SMILES') # Define the Parsl configuration config = Config( executors=[ HighThroughputExecutor( address=address_by_hostname(), label="htex", max_workers=1, provider=CobaltProvider( queue='CVD_Research', account='CVD_Research', launcher=AprunLauncher(overrides="-d 64 --cc depth"), walltime='3:00:00', nodes_per_block=64, init_blocks=1, min_blocks=1, max_blocks=4, scheduler_options='#COBALT --attrs enable_ssh=1', worker_init=''' module load miniconda-3 source activate /home/lward/exalearn/covid/toxicity-prediction/deepchem/env export KMP_AFFINITY=disabled which python ''', cmd_timeout=120, ), ),
def theta_xtb_config(log_dir: str, xtb_per_node: int = 1, ml_tasks_per_node: int = 1, total_nodes: int = int(os.environ.get( "COBALT_JOBSIZE", 1))): """Theta configuration where QC tasks and ML tasks run on single nodes. There are no MPI tasks in this configuration. Args: ml_workers: Number of nodes dedicated to ML tasks xtb_per_node: Number of XTB calculations ml_tasks_per_node: Number of ML tasks to place on each node log_dir: Path to store monitoring DB and parsl logs total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE Returns: (Config) Parsl configuration """ return Config( executors=[ HighThroughputExecutor( address=address_by_hostname(), label="qc", max_workers=xtb_per_node, cpu_affinity='block', provider=LocalProvider( nodes_per_block=total_nodes, init_blocks=0, max_blocks=1, launcher=AprunLauncher( overrides='-d 64 --cc depth' ), # Places worker on the compute node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ), HighThroughputExecutor( address=address_by_hostname(), label="ml", max_workers=ml_tasks_per_node, cpu_affinity='block', provider=LocalProvider( nodes_per_block=total_nodes, init_blocks=1, max_blocks=1, launcher=AprunLauncher( overrides='-d 64 --cc depth' ), # Places worker on the compute node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ) ], monitoring=MonitoringHub( hub_address=address_by_hostname(), hub_port=55055, monitoring_debug=False, resource_monitoring_interval=10, logdir=log_dir, logging_endpoint= f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'), run_dir=log_dir, strategy='simple', max_idletime=15.)
def theta_nwchem_config(ml_workers: int, log_dir: str, nodes_per_nwchem: int = 2, total_nodes: int = int(os.environ.get("COBALT_JOBSIZE", 1))) -> Config: """Theta configuration where QC workers sit on the launch node (to be able to aprun) and ML workers are placed on compute nodes Args: ml_workers: Number of nodes dedicated to ML tasks nodes_per_nwchem: Number of nodes per NWChem computation log_dir: Path to store monitoring DB and parsl logs total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE Returns: (Config) Parsl configuration """ nwc_nodes = total_nodes - ml_workers assert nwc_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task" nwc_workers = nwc_nodes // nodes_per_nwchem return Config( executors=[ HighThroughputExecutor( address=address_by_hostname(), label="qc", max_workers=nwc_workers, cores_per_worker=1e-6, provider=LocalProvider( nodes_per_block=1, init_blocks=1, max_blocks=1, launcher=SimpleLauncher(), # Places worker on the launch node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ), HighThroughputExecutor( address=address_by_hostname(), label="ml", max_workers=1, provider=LocalProvider( nodes_per_block=ml_workers, init_blocks=1, max_blocks=1, launcher=AprunLauncher(overrides='-d 64 --cc depth'), # Places worker on the compute node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ) ], monitoring=MonitoringHub( hub_address=address_by_hostname(), monitoring_debug=False, resource_monitoring_interval=10, logdir=log_dir, logging_endpoint=f'sqlite:///{os.path.join(log_dir, "monitoring.db")}' ), run_dir=log_dir, strategy=None, )
def theta_persistent(log_dir: str, nodes_per_nwchem: int = 1, qc_nodes: int = 8, ml_nodes: int = 8, ml_prefetch: int = 0) -> Config: """Configuration where the application is persistent and sits on the Theta login node. Nodes will be requested from Cobalt using separate jobs for ML and QC tasks. Args: nodes_per_nwchem: Number of nodes per NWChem computation log_dir: Path to store monitoring DB and parsl logs qc_nodes: Number of nodes dedicated to QC tasks ml_prefetch: Number of tasks for ML workers to prefetch for inference Returns: (Config) Parsl configuration """ return Config( retries=8, executors=[ HighThroughputExecutor( address=address_by_hostname(), label="qc", max_workers=qc_nodes // nodes_per_nwchem, prefetch_capacity=ml_prefetch, provider=CobaltProvider( account='CSC249ADCD08', queue='debug-cache-quad' if qc_nodes <= 8 else None, walltime='00:60:00', nodes_per_block=qc_nodes, init_blocks=0, max_blocks=1, launcher=SimpleLauncher(), cmd_timeout=360, worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env export OMP_NUM_THREADS=64 export KMP_INIT_AT_FORK=FALSE export PYTHONPATH=$PYTHONPATH:$(pwd) export PATH="/lus/theta-fs0/projects/CSC249ADCD08/software/nwchem-6.8.1/bin/LINUX64:$PATH" mkdir -p scratch # For the NWChem tasks pwd which nwchem hostname module load atp export MPICH_GNI_MAX_EAGER_MSG_SIZE=16384 export MPICH_GNI_MAX_VSHORT_MSG_SIZE=10000 export MPICH_GNI_MAX_EAGER_MSG_SIZE=131072 export MPICH_GNI_NUM_BUFS=300 export MPICH_GNI_NDREG_MAXSIZE=16777216 export MPICH_GNI_MBOX_PLACEMENT=nic export MPICH_GNI_LMT_PATH=disabled export COMEX_MAX_NB_OUTSTANDING=6 export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64_lin/:/opt/intel/compilers_and_libraries_2020.0.166/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH ''', ), ), HighThroughputExecutor( address=address_by_hostname(), label="ml", max_workers=1, prefetch_capacity=ml_prefetch, provider=CobaltProvider( account='CSC249ADCD08', queue='debug-flat-quad', nodes_per_block=ml_nodes, scheduler_options='#COBALT --attrs enable_ssh=1', walltime='00:60:00', init_blocks=0, max_blocks=1, cmd_timeout=360, launcher=AprunLauncher( overrides='-d 256 --cc depth -j 4' ), # Places worker on the compute node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env''', ), ) ], run_dir=log_dir, strategy='simple', max_idletime=15.)
], strategy=None ) # Configuration to run on Theta using single-app applications theta_config = Config( executors=[ HighThroughputExecutor( address=address_by_hostname(), label="htex", max_workers=8, provider=LocalProvider( nodes_per_block=os.environ.get("COBALT_JOBSIZE", 1), init_blocks=1, max_blocks=1, launcher=AprunLauncher(overrides='-d 64 --cc depth'), worker_init=''' module load miniconda-3 export PATH=~/software/psi4/bin:$PATH conda activate /lus/theta-fs0/projects/CSC249ADCD08/colmena/env export KMP_INIT_AT_FORK=FALSE export OMP_NUM_THREADS=8 ''', ), ), ThreadPoolExecutor(label="local_threads", max_threads=4) ], strategy=None, ) theta_nwchem_config = Config(
aprun_overrides = """-cc depth -j 1 -d 64""" theta_executor = HighThroughputExecutor( label='worker-nodes', address=address_by_hostname(), worker_debug=True, suppress_failure=False, poll_period=5000, cores_per_worker=256, heartbeat_period=300, heartbeat_threshold=1200, provider=LocalProvider(nodes_per_block=8, init_blocks=1, min_blocks=1, max_blocks=1, launcher=AprunLauncher(overrides=aprun_overrides), walltime=WALLTIME), ) cori_in_salloc_executor = HighThroughputExecutor( label='worker-nodes', address=address_by_hostname(), worker_debug=True, suppress_failure=True, poll_period=5000, cores_per_worker=272, heartbeat_period=300, heartbeat_threshold=1200, provider=LocalProvider(nodes_per_block=299, init_blocks=1, min_blocks=1,
strategy=None, ) config = Config( retries=1, usage_tracking=True, executors=[ HighThroughputExecutor( address=address_by_hostname(), label="htex", max_workers=4, prefetch_capacity=1, provider=CobaltProvider( queue='CVD_Research', account='CVD_Research', launcher=AprunLauncher(overrides="-d 256 --cc depth -j 4"), walltime='3:00:00', nodes_per_block=4, init_blocks=1, min_blocks=1, max_blocks=4, scheduler_options='#COBALT --attrs enable_ssh=1', worker_init=f''' module load miniconda-3 module load java source activate /home/lward/exalearn/covid/toxicity-prediction/opera/env export KMP_AFFINITY=disabled which python # Set the environment variables {envs}
HighThroughputExecutor( label='theta_local_htex_multinode', max_workers= 32, # The target process itself if a Multiprocessing application. We do not # need to overload the compute node with parsl workers. address="10.236.1.195", # address=address_by_hostname(), # launch_cmd=launch_cmd, prefetch_capacity=2, provider=CobaltProvider( #queue='debug-flat-quad', #queue='default', queue='CVD_Research', #account='candle_aesp', account='CVD_Research', launcher=AprunLauncher(overrides=" -d 64"), walltime='02:00:00', nodes_per_block=25, init_blocks=1, min_blocks=1, max_blocks=1, # string to prepend to #COBALT blocks in the submit # script to the scheduler eg: '#COBALT -t 50' # scheduler_options='', # Command to be run before starting a worker, such as: # 'module load Anaconda; source activate parsl_env'. # worker_init='source ~/Anaconda/bin/activate; conda activate candle_py3.7;', worker_init= 'source ~/anaconda3/bin/activate; conda activate candle_py3.7;', #worker_init=("bash /projects/candle_aesp/yadu/unpack_and_load.sh tmp candle_py3.7 /projects/candle_aesp/yadu/candle_py3.7.tar.gz /dev/shm/ ;" # "source /dev/shm/candle_py3.7/bin/activate ;"
from parsl.config import Config from parsl.executors import HighThroughputExecutor # If you are a developer running tests, make sure to update parsl/tests/configs/user_opts.py # If you are a user copying-and-pasting this as an example, make sure to either # 1) create a local `user_opts.py`, or # 2) delete the user_opts import below and replace all appearances of `user_opts` with the literal value # (i.e., user_opts['swan']['username'] -> 'your_username') from .user_opts import user_opts config = Config( executors=[ HighThroughputExecutor( label='swan_htex', provider=TorqueProvider( channel=SSHChannel( hostname='swan.cray.com', username=user_opts['swan']['username'], script_dir=user_opts['swan']['script_dir'], ), nodes_per_block=1, init_blocks=1, max_blocks=1, launcher=AprunLauncher(), scheduler_options=user_opts['swan']['scheduler_options'], worker_init=user_opts['swan']['worker_init'], ), ) ] )
from parsl.launchers import AprunLauncher from parsl.providers import TorqueProvider from parsl.config import Config from parsl.executors import HighThroughputExecutor from parsl.addresses import address_by_hostname from parsl.data_provider.scheme import GlobusScheme config = Config(executors=[ HighThroughputExecutor( label="bluewaters_htex", worker_debug=True, address="<LOGIN_NODE>", provider=TorqueProvider( channel=LocalChannel(), init_blocks=1, max_blocks=1, min_blocks=1, nodes_per_block=1, launcher=AprunLauncher(overrides="-b -- bwpy-environ --"), scheduler_options='''#PBS -l nodes=1:ppn=32 #PBS -q debug''', worker_init='''module load bwpy''', walltime='00:30:00'), storage_access=[ GlobusScheme(endpoint_uuid="d59900ef-6d04-11e5-ba46-22000b92c6ec", endpoint_path="/", local_path="/") ]) ], )