Пример #1
0
    def __init__(self,
                 channel=LocalChannel(),
                 account=None,
                 queue=None,
                 scheduler_options='',
                 worker_init='',
                 nodes_per_block=1,
                 init_blocks=1,
                 min_blocks=0,
                 max_blocks=100,
                 parallelism=1,
                 launcher=AprunLauncher(),
                 walltime="00:20:00",
                 cmd_timeout=120):
        label = 'torque'
        super().__init__(label,
                         channel,
                         nodes_per_block,
                         init_blocks,
                         min_blocks,
                         max_blocks,
                         parallelism,
                         walltime,
                         launcher,
                         cmd_timeout=cmd_timeout)

        self.account = account
        self.queue = queue
        self.scheduler_options = scheduler_options
        self.worker_init = worker_init
        self.provisioned_blocks = 0
        self.template_string = template_string

        # Dictionary that keeps track of jobs, keyed on job_id
        self.resources = {}
Пример #2
0
    def __init__(self,
                 channel=LocalChannel(),
                 nodes_per_block=1,
                 init_blocks=0,
                 min_blocks=0,
                 max_blocks=1,
                 parallelism=1,
                 walltime="00:10:00",
                 account=None,
                 queue=None,
                 scheduler_options='',
                 worker_init='',
                 launcher=AprunLauncher(),
                 cmd_timeout=10):
        label = 'cobalt'
        super().__init__(label,
                         channel=channel,
                         nodes_per_block=nodes_per_block,
                         init_blocks=init_blocks,
                         min_blocks=min_blocks,
                         max_blocks=max_blocks,
                         parallelism=parallelism,
                         walltime=walltime,
                         launcher=launcher,
                         cmd_timeout=cmd_timeout)

        self.account = account
        self.queue = queue
        self.scheduler_options = scheduler_options
        self.worker_init = worker_init
Пример #3
0
def local_setup():
    threads_config = Config(
        executors=[
            HighThroughputExecutor(
                label="theta_htex",
                # worker_debug=True,
                cores_per_worker=4,
                provider=CobaltProvider(
                    queue='debug-flat-quad',
                    account='CSC249ADCD01',
                    launcher=AprunLauncher(overrides="-d 64"),
                    worker_init='source activate parsl-issues',
                    init_blocks=1,
                    max_blocks=1,
                    min_blocks=1,
                    nodes_per_block=4,
                    cmd_timeout=60,
                    walltime='00:10:00',
                ),
            )
        ],
        monitoring=MonitoringHub(hub_port=55055,
                                 logging_level=logging.DEBUG,
                                 resource_monitoring_interval=10),
        strategy=None)
    parsl.load(threads_config)
Пример #4
0
def fresh_config():
    config = Config(
        executors=[
            HighThroughputExecutor(
                label="bw_htex",
                cores_per_worker=1,
                worker_debug=False,
                max_workers=1,
                address=address_by_hostname(),
                provider=TorqueProvider(
                    queue='normal',
                    launcher=AprunLauncher(overrides="-b -- bwpy-environ --"),
                    # string to prepend to #SBATCH blocks in the submit
                    # script to the scheduler eg: '#SBATCH --constraint=knl,quad,cache'
                    scheduler_options='',
                    # Command to be run before starting a worker, such as:
                    # 'module load Anaconda; source activate parsl_env'.
                    worker_init=user_opts['bluewaters']['worker_init'],
                    init_blocks=1,
                    max_blocks=1,
                    min_blocks=1,
                    nodes_per_block=2,
                    walltime='00:30:00',
                    cmd_timeout=120,
                ),
            )
        ], )
    return config
Пример #5
0
def fresh_config():
    return Config(
        executors=[
            HighThroughputExecutor(
                label='theta_local_htex_multinode',
                max_workers=1,
                address=address_by_hostname(),
                provider=CobaltProvider(
                    queue=user_opts['theta']['queue'],
                    account=user_opts['theta']['account'],
                    launcher=AprunLauncher(overrides="-d 64"),
                    walltime='00:10:00',
                    nodes_per_block=2,
                    init_blocks=1,
                    max_blocks=1,
                    # string to prepend to #COBALT blocks in the submit
                    # script to the scheduler eg: '#COBALT -t 50'
                    scheduler_options='',
                    # Command to be run before starting a worker, such as:
                    # 'module load Anaconda; source activate parsl_env'.
                    worker_init=user_opts['theta']['worker_init'],
                    cmd_timeout=120,
                ),
            )
        ], )
Пример #6
0
def theta_nwchem_config(log_dir: str,
                        nodes_per_nwchem: int = 2,
                        total_nodes: int = int(
                            os.environ.get("COBALT_JOBSIZE", 1)),
                        ml_prefetch: int = 0) -> Config:
    """Theta configuration where QC workers sit on the launch node (to be able to aprun)
    and ML workers are placed on compute nodes

    Args:
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
        ml_prefetch: Number of tasks for ML workers to prefetch for inference
    Returns:
        (Config) Parsl configuration
    """
    assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task"
    nwc_workers = total_nodes // nodes_per_nwchem

    return Config(
        executors=[
            ThreadPoolExecutor(label='qc', max_threads=nwc_workers),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=1,
                prefetch_capacity=ml_prefetch,
                provider=LocalProvider(
                    nodes_per_block=
                    nodes_per_nwchem,  # Minimum increment in blcoks
                    init_blocks=0,
                    max_blocks=total_nodes //
                    nodes_per_nwchem,  # Limits the number of manager processes,
                    launcher=AprunLauncher(
                        overrides='-d 256 --cc depth -j 4'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
    ''',
                ),
            )
        ],
        monitoring=MonitoringHub(
            hub_address=address_by_hostname(),
            monitoring_debug=False,
            resource_monitoring_interval=10,
            logdir=log_dir,
            logging_endpoint=
            f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'),
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)
Пример #7
0
def parsl_config(name: str) -> Tuple[Config, int]:
    """Make the compute resource configuration

    Args:
        name: Name of the diesred configuration
    Returns:
        - Parsl compute configuration
        - Number of compute slots: Includes execution slots and pre-fetch buffers
    """

    if name == 'local':
        return Config(
            executors=[
                HighThroughputExecutor(max_workers=16, prefetch_capacity=1)
            ]
        ), 64
    elif name == 'theta-debug':
        return Config(
            retries=16,
            executors=[HighThroughputExecutor(
                    address=address_by_hostname(),
                    label="debug",
                    max_workers=64,
                    prefetch_capacity=64,
                    cpu_affinity='block',
                    provider=CobaltProvider(
                        account='redox_adsp',
                        queue='debug-flat-quad',
                        nodes_per_block=8,
                        scheduler_options='#COBALT --attrs enable_ssh=1',
                        walltime='00:60:00',
                        init_blocks=0,
                        max_blocks=1,
                        cmd_timeout=360,
                        launcher=AprunLauncher(overrides='-d 64 --cc depth -j 1'),
                        worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env''',
                    ),
                )]
            ), 64 * 8 * 4
    else:
        raise ValueError(f'Configuration not defined: {name}')
Пример #8
0
    with Pool(args.local_workers) as p:
        database['invalid'] = p.map(is_smiles_valid, database['smiles'])
    database.query('not invalid', inplace=True)
    logging.info(f'Found {len(database)} valid SMILES')

    # Define the Parsl configuration
    config = Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="htex",
                max_workers=1,
                provider=CobaltProvider(
                    queue='CVD_Research',
                    account='CVD_Research',
                    launcher=AprunLauncher(overrides="-d 64 --cc depth"),
                    walltime='3:00:00',
                    nodes_per_block=64,
                    init_blocks=1,
                    min_blocks=1,
                    max_blocks=4,
                    scheduler_options='#COBALT --attrs enable_ssh=1',
                    worker_init='''
module load miniconda-3
source activate /home/lward/exalearn/covid/toxicity-prediction/deepchem/env
export KMP_AFFINITY=disabled
which python
                    ''',
                    cmd_timeout=120,
                ),
            ),
Пример #9
0
def theta_xtb_config(log_dir: str,
                     xtb_per_node: int = 1,
                     ml_tasks_per_node: int = 1,
                     total_nodes: int = int(os.environ.get(
                         "COBALT_JOBSIZE", 1))):
    """Theta configuration where QC tasks and ML tasks run on single nodes.

    There are no MPI tasks in this configuration.

    Args:
        ml_workers: Number of nodes dedicated to ML tasks
        xtb_per_node: Number of XTB calculations
        ml_tasks_per_node: Number of ML tasks to place on each node
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
    Returns:
        (Config) Parsl configuration
    """

    return Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc",
                max_workers=xtb_per_node,
                cpu_affinity='block',
                provider=LocalProvider(
                    nodes_per_block=total_nodes,
                    init_blocks=0,
                    max_blocks=1,
                    launcher=AprunLauncher(
                        overrides='-d 64 --cc depth'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
''',
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=ml_tasks_per_node,
                cpu_affinity='block',
                provider=LocalProvider(
                    nodes_per_block=total_nodes,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=AprunLauncher(
                        overrides='-d 64 --cc depth'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
''',
                ),
            )
        ],
        monitoring=MonitoringHub(
            hub_address=address_by_hostname(),
            hub_port=55055,
            monitoring_debug=False,
            resource_monitoring_interval=10,
            logdir=log_dir,
            logging_endpoint=
            f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'),
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)
Пример #10
0
def theta_nwchem_config(ml_workers: int, log_dir: str, nodes_per_nwchem: int = 2,
                        total_nodes: int = int(os.environ.get("COBALT_JOBSIZE", 1))) -> Config:
    """Theta configuration where QC workers sit on the launch node (to be able to aprun)
    and ML workers are placed on compute nodes

    Args:
        ml_workers: Number of nodes dedicated to ML tasks
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
    Returns:
        (Config) Parsl configuration
    """
    nwc_nodes = total_nodes - ml_workers
    assert nwc_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task"
    nwc_workers = nwc_nodes // nodes_per_nwchem

    return Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc",
                max_workers=nwc_workers,
                cores_per_worker=1e-6,
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),  # Places worker on the launch node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
''',
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=1,
                provider=LocalProvider(
                    nodes_per_block=ml_workers,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=AprunLauncher(overrides='-d 64 --cc depth'),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
    ''',
                ),
            )
        ],
        monitoring=MonitoringHub(
            hub_address=address_by_hostname(),
            monitoring_debug=False,
            resource_monitoring_interval=10,
            logdir=log_dir,
            logging_endpoint=f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'
        ),
        run_dir=log_dir,
        strategy=None,
    )
Пример #11
0
def theta_persistent(log_dir: str,
                     nodes_per_nwchem: int = 1,
                     qc_nodes: int = 8,
                     ml_nodes: int = 8,
                     ml_prefetch: int = 0) -> Config:
    """Configuration where the application is persistent and sits on the Theta login node.

    Nodes will be requested from Cobalt using separate jobs for ML and QC tasks.

    Args:
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        qc_nodes: Number of nodes dedicated to QC tasks
        ml_prefetch: Number of tasks for ML workers to prefetch for inference
    Returns:
        (Config) Parsl configuration
    """
    return Config(
        retries=8,
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc",
                max_workers=qc_nodes // nodes_per_nwchem,
                prefetch_capacity=ml_prefetch,
                provider=CobaltProvider(
                    account='CSC249ADCD08',
                    queue='debug-cache-quad' if qc_nodes <= 8 else None,
                    walltime='00:60:00',
                    nodes_per_block=qc_nodes,
                    init_blocks=0,
                    max_blocks=1,
                    launcher=SimpleLauncher(),
                    cmd_timeout=360,
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env


export OMP_NUM_THREADS=64
export KMP_INIT_AT_FORK=FALSE
export PYTHONPATH=$PYTHONPATH:$(pwd)

export PATH="/lus/theta-fs0/projects/CSC249ADCD08/software/nwchem-6.8.1/bin/LINUX64:$PATH"
mkdir -p scratch  # For the NWChem tasks
pwd
which nwchem
hostname
module load atp
export MPICH_GNI_MAX_EAGER_MSG_SIZE=16384
export MPICH_GNI_MAX_VSHORT_MSG_SIZE=10000
export MPICH_GNI_MAX_EAGER_MSG_SIZE=131072
export MPICH_GNI_NUM_BUFS=300
export MPICH_GNI_NDREG_MAXSIZE=16777216
export MPICH_GNI_MBOX_PLACEMENT=nic
export MPICH_GNI_LMT_PATH=disabled
export COMEX_MAX_NB_OUTSTANDING=6
export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64_lin/:/opt/intel/compilers_and_libraries_2020.0.166/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH
''',
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=1,
                prefetch_capacity=ml_prefetch,
                provider=CobaltProvider(
                    account='CSC249ADCD08',
                    queue='debug-flat-quad',
                    nodes_per_block=ml_nodes,
                    scheduler_options='#COBALT --attrs enable_ssh=1',
                    walltime='00:60:00',
                    init_blocks=0,
                    max_blocks=1,
                    cmd_timeout=360,
                    launcher=AprunLauncher(
                        overrides='-d 256 --cc depth -j 4'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env''',
                ),
            )
        ],
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)
Пример #12
0
    ],
    strategy=None
)

# Configuration to run on Theta using single-app applications
theta_config = Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="htex",
                max_workers=8,
                provider=LocalProvider(
                    nodes_per_block=os.environ.get("COBALT_JOBSIZE", 1),
                    init_blocks=1,
                    max_blocks=1,
                    launcher=AprunLauncher(overrides='-d 64 --cc depth'),
                    worker_init='''
module load miniconda-3
export PATH=~/software/psi4/bin:$PATH
conda activate /lus/theta-fs0/projects/CSC249ADCD08/colmena/env
export KMP_INIT_AT_FORK=FALSE
export OMP_NUM_THREADS=8
''',
                ),
            ),
            ThreadPoolExecutor(label="local_threads", max_threads=4)
        ],
        strategy=None,
    )

theta_nwchem_config = Config(
aprun_overrides = """-cc depth -j 1 -d 64"""

theta_executor = HighThroughputExecutor(
    label='worker-nodes',
    address=address_by_hostname(),
    worker_debug=True,
    suppress_failure=False,
    poll_period=5000,
    cores_per_worker=256,
    heartbeat_period=300,
    heartbeat_threshold=1200,
    provider=LocalProvider(nodes_per_block=8,
                           init_blocks=1,
                           min_blocks=1,
                           max_blocks=1,
                           launcher=AprunLauncher(overrides=aprun_overrides),
                           walltime=WALLTIME),
)

cori_in_salloc_executor = HighThroughputExecutor(
    label='worker-nodes',
    address=address_by_hostname(),
    worker_debug=True,
    suppress_failure=True,
    poll_period=5000,
    cores_per_worker=272,
    heartbeat_period=300,
    heartbeat_threshold=1200,
    provider=LocalProvider(nodes_per_block=299,
                           init_blocks=1,
                           min_blocks=1,
Пример #14
0
    strategy=None,
)

config = Config(
    retries=1,
    usage_tracking=True,
    executors=[
        HighThroughputExecutor(
            address=address_by_hostname(),
            label="htex",
            max_workers=4,
            prefetch_capacity=1,
            provider=CobaltProvider(
                queue='CVD_Research',
                account='CVD_Research',
                launcher=AprunLauncher(overrides="-d 256 --cc depth -j 4"),
                walltime='3:00:00',
                nodes_per_block=4,
                init_blocks=1,
                min_blocks=1,
                max_blocks=4,
                scheduler_options='#COBALT --attrs enable_ssh=1',
                worker_init=f'''
module load miniconda-3
module load java
source activate /home/lward/exalearn/covid/toxicity-prediction/opera/env
export KMP_AFFINITY=disabled
which python

# Set the environment variables
{envs}
Пример #15
0
 HighThroughputExecutor(
     label='theta_local_htex_multinode',
     max_workers=
     32,  # The target process itself if a Multiprocessing application. We do not
     # need to overload the compute node with parsl workers.
     address="10.236.1.195",
     # address=address_by_hostname(),
     # launch_cmd=launch_cmd,
     prefetch_capacity=2,
     provider=CobaltProvider(
         #queue='debug-flat-quad',
         #queue='default',
         queue='CVD_Research',
         #account='candle_aesp',
         account='CVD_Research',
         launcher=AprunLauncher(overrides=" -d 64"),
         walltime='02:00:00',
         nodes_per_block=25,
         init_blocks=1,
         min_blocks=1,
         max_blocks=1,
         # string to prepend to #COBALT blocks in the submit
         # script to the scheduler eg: '#COBALT -t 50'
         # scheduler_options='',
         # Command to be run before starting a worker, such as:
         # 'module load Anaconda; source activate parsl_env'.
         # worker_init='source ~/Anaconda/bin/activate; conda activate candle_py3.7;',
         worker_init=
         'source ~/anaconda3/bin/activate; conda activate candle_py3.7;',
         #worker_init=("bash /projects/candle_aesp/yadu/unpack_and_load.sh tmp candle_py3.7 /projects/candle_aesp/yadu/candle_py3.7.tar.gz /dev/shm/ ;"
         #             "source /dev/shm/candle_py3.7/bin/activate ;"
Пример #16
0
from parsl.config import Config
from parsl.executors import HighThroughputExecutor

# If you are a developer running tests, make sure to update parsl/tests/configs/user_opts.py
# If you are a user copying-and-pasting this as an example, make sure to either
#       1) create a local `user_opts.py`, or
#       2) delete the user_opts import below and replace all appearances of `user_opts` with the literal value
#          (i.e., user_opts['swan']['username'] -> 'your_username')
from .user_opts import user_opts

config = Config(
    executors=[
        HighThroughputExecutor(
            label='swan_htex',
            provider=TorqueProvider(
                channel=SSHChannel(
                    hostname='swan.cray.com',
                    username=user_opts['swan']['username'],
                    script_dir=user_opts['swan']['script_dir'],
                ),
                nodes_per_block=1,
                init_blocks=1,
                max_blocks=1,
                launcher=AprunLauncher(),
                scheduler_options=user_opts['swan']['scheduler_options'],
                worker_init=user_opts['swan']['worker_init'],
            ),
        )
    ]
)
Пример #17
0
from parsl.launchers import AprunLauncher
from parsl.providers import TorqueProvider

from parsl.config import Config
from parsl.executors import HighThroughputExecutor
from parsl.addresses import address_by_hostname

from parsl.data_provider.scheme import GlobusScheme

config = Config(executors=[
    HighThroughputExecutor(
        label="bluewaters_htex",
        worker_debug=True,
        address="<LOGIN_NODE>",
        provider=TorqueProvider(
            channel=LocalChannel(),
            init_blocks=1,
            max_blocks=1,
            min_blocks=1,
            nodes_per_block=1,
            launcher=AprunLauncher(overrides="-b -- bwpy-environ --"),
            scheduler_options='''#PBS -l nodes=1:ppn=32
#PBS -q debug''',
            worker_init='''module load bwpy''',
            walltime='00:30:00'),
        storage_access=[
            GlobusScheme(endpoint_uuid="d59900ef-6d04-11e5-ba46-22000b92c6ec",
                         endpoint_path="/",
                         local_path="/")
        ])
], )