Пример #1
0
def workflow_config(name,
                    nodes,
                    cores_per_node=24,
                    interval=30,
                    monitor=False):
    import parsl
    from parsl.config import Config
    from parsl.channels import LocalChannel
    from parsl.launchers import SrunLauncher
    from parsl.providers import LocalProvider
    from parsl.addresses import address_by_interface
    from parsl.executors import HighThroughputExecutor
    from parsl.monitoring.monitoring import MonitoringHub

    parsl.set_stream_logger()
    parsl.set_file_logger('script.output', level=logging.DEBUG)

    logging.info('Configuring Parsl Workflow Infrastructure')

    #Read where datasets are...
    env_str = str()
    with open('parsl.env', 'r') as reader:
        env_str = reader.read()

    logging.info(f'Task Environment {env_str}')

    mon_hub = MonitoringHub(
        workflow_name=name,
        hub_address=address_by_interface('ib0'),
        hub_port=60001,
        resource_monitoring_enabled=True,
        monitoring_debug=False,
        resource_monitoring_interval=interval,
    ) if monitor else None

    config = Config(
        executors=[
            HighThroughputExecutor(
                label=name,
                # Optional: The network interface on node 0 which compute nodes can communicate with.
                # address=address_by_interface('enp4s0f0' or 'ib0')
                address=address_by_interface('ib0'),
                # one worker per manager / node
                max_workers=cores_per_node,
                provider=LocalProvider(
                    channel=LocalChannel(script_dir='.'),
                    # make sure the nodes_per_block matches the nodes requested in the submit script in the next step
                    nodes_per_block=nodes,
                    # make sure
                    launcher=SrunLauncher(overrides=f'-c {cores_per_node}'),
                    cmd_timeout=120,
                    init_blocks=1,
                    max_blocks=1,
                    worker_init=env_str,
                ),
            )
        ],
        monitoring=mon_hub,
        strategy=None,
    )

    logging.info('Loading Parsl Config')

    parsl.load(config)
    return
Пример #2
0
def _parsl_initialize(config=None):
    dfk = parsl.load(config)
    return dfk
Пример #3
0
        help="Length of array where each array is s x s",
    )
    parser.add_argument(
        "--proxy",
        action="store_true",
        help="Use proxy store to pass inputs",
    )
    parser.add_argument(
        "--redis-port",
        type=int,
        default=None,
        help="If not None, use Redis backend",
    )
    args = parser.parse_args()

    parsl.load()

    if args.proxy:
        if args.redis_port is None:
            store = ps.store.init_store("local")
        else:
            store = ps.store.init_store(
                "redis",
                hostname="127.0.0.1",
                port=args.redis_port,
            )

    mapped_results = []
    for _ in range(args.num_arrays):
        x = np.random.rand(args.size, args.size)
        if args.proxy:
Пример #4
0
parser.add_argument("--config",
                    default=None,
                    help="Parsl config to parallelize with")
args = parser.parse_args()

base_dir = '/'.join(os.path.abspath(__file__).split('/')[:-2])
if args.out_dir is None:
    args.out_dir = os.path.join(base_dir, 'data', 'processed')
if args.config is None:
    args.config = os.path.join(base_dir, 'fusionsimulation', 'configs',
                               'local.py')

spec = importlib.util.spec_from_file_location('', args.config)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
parsl.load(module.config)

if args.container_type == 'singularity':
    image_path = '{base_dir}/docker/fusion-simulation.sif'.format(
        base_dir=base_dir)
    # FIXME may require too much memory on some machines
    if not os.path.isfile(image_path):
        print('downloading {}'.format(image_path))
        subprocess.call(
            'singularity build {image_path} docker://olopadelab/fusion-simulation'
            .format(image_path=image_path),
            shell=True)

gene_id_to_gene_name_map_path = parse_gene_id_to_gene_name_map(
    args.gencode_annotation)
Пример #5
0
        print(prev.filepath)

    for key in futs:
        if key > 0:
            fu = futs[key]
            data = open(fu.result().filepath, 'r').read().strip()
            assert data == str(
                key), "[TEST] incr failed for key: {0} got: {1}".format(
                    key, data)

    cleanup_work(depth)


if __name__ == '__main__':
    parsl.clear()
    dfk = parsl.load(config)

    parser = argparse.ArgumentParser()
    parser.add_argument("-w",
                        "--width",
                        default="5",
                        help="width of the pipeline")
    parser.add_argument("-d",
                        "--debug",
                        action='store_true',
                        help="Count of apps to launch")
    args = parser.parse_args()

    if args.debug:
        parsl.set_stream_logger()
from parsl.channels import SSHChannel
from parsl.providers.local.local import LocalProvider
import os

remote_config = Config(executors=[
    IPyParallelExecutor(
        label='remote_ipp',
        provider=LocalProvider(
            min_blocks=1,
            init_blocks=1,
            max_blocks=4,
            nodes_per_block=1,
            parallelism=0.5,
            channel=SSHChannel(hostname="localhost"),
            #worker_init='source /phys/groups/tev/scratch3/users/gwatts/anaconda3/etc/profile.d/conda.sh && conda activate parsl',
            worker_init=
            'source /home/gwatts/anaconda3/etc/profile.d/conda.sh && export PYTHONPATH=$PYTHONPATH:{} && conda activate parsl_test'
            .format(os.getcwd()),
            move_files=False,
        ))
])
parsl.load(remote_config)

# Run this and print out the result
if os.path.isfile("all_hellos.txt"):
    os.unlink("all_hellos.txt")
r = run_cat_test()
with open(r.outputs[0].result(), 'r') as f:
    print(f.read())

print("Result from the test is {}".format(r.result()))
Пример #7
0
                    max_workers=nproc,
                    provider=SlurmProvider(
                        channel=LocalChannel(script_dir='parsl_slurm'),
                        launcher=SrunLauncher(),
                        max_blocks=(args.ncpu)+5,
                        init_blocks=args.ncpu, 
                        partition='all',
                        scheduler_options=sched_opts,   # Enter scheduler_options if needed
                        worker_init=wrk_init,         # Enter worker_init if needed
                        walltime='00:120:00'
                    ),
                )
            ],
            retries=20,
        )
        dfk = parsl.load(slurm_htex)
    else:
        config = Config(executors=[ThreadPoolExecutor(max_threads=args.ncpu)])
        parsl.load(config)

    # Write futures
    out_dict = {} # Output filename list
    run_futures = {} # Future list
    for key in sorted(sample_dict.keys()):
        run_futures[key] = []
        # Make size batches
        batches = []
        batch_list = []
        batch_size, tot_size = 0, 0
        for i, fname in enumerate(sample_dict[key]):
            batch_size += os.stat(fname).st_size
Пример #8
0
def main():

    local_threads = Config(
        executors=[ThreadPoolExecutor(max_threads=8, label='local_threads')])

    local_htex = Config(
        executors=[
            HighThroughputExecutor(
                label="htex_Local",
                worker_debug=True,
                cores_per_worker=1,
                provider=LocalProvider(
                    channel=LocalChannel(),
                    init_blocks=1,
                    max_blocks=1,
                    worker_init=(
                        'export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES\n'),
                ),
            )
        ],
        strategy=None,
    )

    parsl.clear()
    #parsl.load(local_threads)
    parsl.load(local_htex)

    @bash_app
    def generate(outputs=[]):
        return "echo $(( RANDOM )) &> {}".format(outputs[0])

    @bash_app
    def concat(inputs=[], outputs=[]):
        return "cat {0} > {1}".format(" ".join(i.filepath for i in inputs),
                                      outputs[0])

    @python_app
    def total(inputs=[]):
        total = 0
        with open(inputs[0], 'r') as f:
            for l in f:
                total += int(l)
        return total

    # Create 5 files with semi-random numbers
    print(f'Getting started?')
    output_files = []
    for i in range(5):
        output_files.append(
            generate(
                outputs=[File(os.path.join(os.getcwd(), 'random-%s.txt' %
                                           i))]))

    # Concatenate the files into a single file
    print(f'before concat')
    cc = concat(inputs=[i.outputs[0] for i in output_files],
                outputs=[File(os.path.join(os.getcwd(), 'combined.txt'))])

    # Calculate the sum of the random numbers
    total = total(inputs=[cc.outputs[0]])

    print(total.result())
Пример #9
0
    max_rtt = max(rtt) * 1000
    avg_rtt = average(rtt) * 1000
    print("App1_RTT   |   Min:{0:0.3}ms Max:{1:0.3}ms Average:{2:0.3}ms".format(min_rtt,
                                                                                max_rtt,
                                                                                avg_rtt))

    rtt = app2_rtts
    min_rtt = min(rtt) * 1000
    max_rtt = max(rtt) * 1000
    avg_rtt = average(rtt) * 1000
    print("App2_RTT   |   Min:{0:0.3}ms Max:{1:0.3}ms Average:{2:0.3}ms".format(min_rtt,
                                                                                max_rtt,
                                                                                avg_rtt))


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--count", default="10",
                        help="Count of apps to launch")
    parser.add_argument("-d", "--debug", action='store_true',
                        help="Count of apps to launch")
    args = parser.parse_args()

    # if args.debug:
    #    parsl.set_stream_logger()

    parsl.load(local_config)

    x = test_simple(int(args.count))
Пример #10
0
                # HighThroughputExecutor(
                #     label="merges",
                #     worker_debug=True,
                #     cores_per_worker=1,
                #     provider=LocalProvider(
                #         channel=LocalChannel(),
                #         init_blocks=1,
                #         max_blocks=10,
                #     ),
                # ),
            ],
            monitoring=monitoring,
            retries=2,
            retry_handler=retry_handler,
        )
        dfk = parsl.load(slurm_htex)

        if args.chunkify:
            print("XXX")
            # os.mkdir("temp")
            for key, fnames in sample_dict.items():
                output, metrics = processor.run_uproot_job(
                    {key: fnames},
                    treename='Events',
                    processor_instance=processor_object,
                    executor=processor.parsl_executor,
                    executor_args={
                        'skipbadfiles': args.skip,
                        'savemetrics': True,
                        'schema': processor.NanoAODSchema,
                        # 'mmap':True,
Пример #11
0
    def _register_workflow(workflow_graph, parsl_config):
        """
        For right now we will keep track of all unique combinations of resource requirements and
        how many of each. The maxBlocks can then be set to the number of each resource requirement. In the
        future we can try to be smarter by examining the workflow graph and deciding how many could
        possibly ever be running concurrently.

        Pipeline is single, Config is single
            * Assign all Apps to null executor
            * @App('python', dfk)  <-- No executor=

        Pipeline is single, Config is multi
            * Assign all Apps to null executor (will be randomly assigned among configured executors)
            * Log warning of mismatch

        Pipeline is multi, Config is single
            * Assign all Apps to null executor
            * Log warning of mismatch

        Pipeline is multi, Config is multi, perfect match
            * Assign all Apps to their appropriate executor

        Pipeline is multi, Config is multi, some match
            * Assign apps that can to their appropriate executors
            * For the remaining, assign to first executor
            * Log warning of mismatch

        Pipeline is multi, Config is multi, no matches
            * Assign all apps to first executor
            * Log warning of mismatch

        :param workflow_graph:
        :param dfk:
        :return:
        """
        # Regiser config with Parsl
        parsl.load(parsl_config)

        is_single_parsl_config = len(parsl_config.executors) <= 1

        # Check to see if Pipeline is single or multi
        pipeline_executors = set(Meta._executors.keys())  # Start with anything defined in Meta
        is_single_pipeline_meta = len(pipeline_executors) <= 1

        logger.debug('Pipeline is {}-{}'.format(
            'single' if is_single_pipeline_meta else 'multi',
            'single' if is_single_parsl_config else 'multi',
        ))

        app_factories = dict()
        # At a minimum define the 'all' executor, which is an executor with no specific label
        app_factories['all'] = ParslPipeline._generate_executor_app_factories()

        # If we have multiple executors, define them
        if not any((is_single_parsl_config, is_single_pipeline_meta)):
            for executor in parsl_config.executors:
                app_factories[executor.label] = ParslPipeline._generate_executor_app_factories(executor_name=executor.label)

        # Some data containers
        app_futures, data_futures = list(), dict()
        app_nodes_registered = {
            node_id: False
            for node_id in workflow_graph
            if workflow_graph.node[node_id]['type'] == 'app'
        }
        data_node_in_degree = {
            node_id: in_degree
            for node_id, in_degree in workflow_graph.in_degree(workflow_graph.nodes())
            if workflow_graph.node[node_id]['type'] == 'data'
        }

        def register_app(app_node_id, workflow_graph):
            """
            Recursive algorithm to traverse the workflow graph and register apps
            :param app_node_id: str ID of the app node to try to register with parsl
            :param workflow_graph: nx.DiGraph Directed graph representation of the workflow
            :return: list<AppFuture> All app futures generated by the workflow
            """
            # Check if any input data nodes don't have data futures and have in-degree > 0
            for input_dependency_node in workflow_graph.predecessors(app_node_id):
                if workflow_graph.nodes[input_dependency_node]['type'] == 'data':
                    if input_dependency_node not in data_futures and data_node_in_degree[input_dependency_node] > 0:
                        register_app(list(workflow_graph.predecessors(input_dependency_node))[0], workflow_graph)
                elif workflow_graph.nodes[input_dependency_node]['type'] == 'app':
                    if not app_nodes_registered[input_dependency_node]:
                        register_app(input_dependency_node, workflow_graph)

            # Register this app
            _app_blueprint = workflow_graph.node[app_node_id]['blueprint']
            _app_inputs = [
                data_futures.get(input_data)
                for input_data in _app_blueprint['inputs']
                if data_futures.get(input_data)
            ]

            # If there are any app dependencies, add them
            if _app_blueprint['wait_on']:
                _app_inputs.extend([
                    app_nodes_registered.get(wait_on_app_id)
                    for wait_on_app_id in _app_blueprint['wait_on']
                    if app_nodes_registered.get(wait_on_app_id)
                ])

            # Select executor to run this app on
            executor_assignment = 'all'
            if not any((is_single_parsl_config, is_single_pipeline_meta)):
                # This is a multi-multi run, we might be able to assign to a executor
                # Giving a name defined in Meta takes precedence
                meta_executor = _app_blueprint.get('meta', {}).get('executor')

                # For backward compatibility with 'site' key
                if meta_executor is None and 'site' in _app_blueprint.get('meta', {}):
                    meta_executor = _app_blueprint.get('meta', {}).get('site')

                if meta_executor is not None and meta_executor in app_factories:
                    executor_assignment = meta_executor
                elif Meta._default_executor is not None and Meta._default_executor in app_factories:
                    executor_assignment = Meta._default_executor

            # Create the App future with a specific executor App factory
            if _app_blueprint['type'] == 'bash':
                _app_future = app_factories[executor_assignment][BASH_APP](
                    cmd=_app_blueprint['cmd'],
                    success_on=_app_blueprint['success_on'],
                    inputs=_app_inputs,
                    outputs=_app_blueprint['outputs'],
                    stdout=_app_blueprint['stdout'],
                    stderr=_app_blueprint['stderr']
                )
            else:
                _app_future = app_factories[executor_assignment][PYTHON_APP](
                    func_=_app_blueprint['func'],
                    func_args=_app_blueprint['args'],
                    func_kwargs=_app_blueprint['kwargs'],
                    inputs=_app_inputs,
                    outputs=_app_blueprint['outputs'],
                    stdout=_app_blueprint['stdout'],
                    stderr=_app_blueprint['stderr']
                )

            logger.info('{} assigned to executor {}, task id {}'.format(_app_blueprint['id'], executor_assignment, _app_future.tid))
            app_futures.append((_app_blueprint['id'], _app_future))

            # Set output data futures
            for data_fut in _app_future.outputs:
                if data_fut.filename not in data_futures:
                    data_futures[data_fut.filename] = data_fut

            app_nodes_registered[app_node_id] = _app_future

        # Register all apps
        for app_node in app_nodes_registered:
            if not app_nodes_registered[app_node]:
                register_app(app_node, workflow_graph)

        # Gather files marked as temporary, if any
        tmp_files = [d for d in data_futures if Data(d).tmp]

        return app_futures, tmp_files
Пример #12
0
def local_setup():
    config = fresh_config()
    config.executors[0].poll_period = 1
    config.executors[0].max_workers = 1
    parsl.load(config)
Пример #13
0
def test():
    import parsl
    from pyscf import lib, gto, scf
    import numpy as np
    import pandas as pd
    import logging

    from parsl.config import Config
    from parsl.providers import LocalProvider
    from parsl.channels import LocalChannel
    from parsl.launchers import SimpleLauncher
    from parsl.executors import ExtremeScaleExecutor
    ncore = 4
    config = Config(
        executors=[
            ExtremeScaleExecutor(label="Extreme_Local",
                                 worker_debug=True,
                                 ranks_per_node=ncore,
                                 provider=LocalProvider(
                                     channel=LocalChannel(),
                                     init_blocks=1,
                                     max_blocks=1,
                                     launcher=SimpleLauncher()))
        ],
        strategy=None,
    )

    parsl.load(config)

    mol = gto.M(atom='H 0. 0. 0.; H 0. 0. 2.0',
                unit='bohr',
                ecp='bfd',
                basis='bfd_vtz')
    mf = scf.RHF(mol).run()
    mol.output = None
    mol.stdout = None
    mf.output = None
    mf.stdout = None
    mf.chkfile = None
    from pyqmc import ExpCuspFunction, GaussianFunction, MultiplyWF, PySCFSlaterRHF, JastrowSpin, initial_guess, EnergyAccumulator
    from pyqmc.accumulators import PGradTransform, LinearTransform

    nconf = 1600
    basis = [
        ExpCuspFunction(2.0, 1.5),
        GaussianFunction(0.5),
        GaussianFunction(2.0),
        GaussianFunction(.25),
        GaussianFunction(1.0),
        GaussianFunction(4.0),
        GaussianFunction(8.0)
    ]
    wf = MultiplyWF(PySCFSlaterRHF(mol, mf), JastrowSpin(mol, basis, basis))
    coords = initial_guess(mol, nconf)
    energy_acc = EnergyAccumulator(mol)
    pgrad_acc = PGradTransform(
        energy_acc, LinearTransform(wf.parameters, ['wf2acoeff', 'wf2bcoeff']))

    from pyqmc.optsr import gradient_descent
    gradient_descent(wf,
                     coords,
                     pgrad_acc,
                     vmc=distvmc,
                     vmcoptions={
                         'npartitions': ncore,
                         'nsteps': 100,
                         'nsteps_per': 100
                     })
Пример #14
0
def parsl_executor(items, function, accumulator, **kwargs):
    """Execute using parsl pyapp wrapper

    Parameters
    ----------
        items : list
            List of input arguments
        function : callable
            A function to be called on each input, which returns an accumulator instance
        accumulator : AccumulatorABC
            An accumulator to collect the output of the function
        config : parsl.config.Config, optional
            A parsl DataFlow configuration object. Necessary if there is no active kernel

            .. note:: In general, it is safer to construct the DFK with ``parsl.load(config)`` prior to calling this function
        status : bool
            If true (default), enable progress bar
        unit : str
            Label of progress bar unit
        desc : str
            Label of progress bar description
        compression : int, optional
            Compress accumulator outputs in flight with LZ4, at level specified (default 1)
            Set to ``None`` for no compression.
        tailtimeout : int, optional
            Timeout requirement on job tails. Cancel all remaining jobs if none have finished
            in the timeout window.
    """
    if len(items) == 0:
        return accumulator
    import parsl
    from parsl.app.app import python_app
    from .parsl.timeout import timeout
    status = kwargs.pop('status', True)
    unit = kwargs.pop('unit', 'items')
    desc = kwargs.pop('desc', 'Processing')
    clevel = kwargs.pop('compression', 1)
    tailtimeout = kwargs.pop('tailtimeout', None)
    if clevel is not None:
        function = _compression_wrapper(clevel, function)
    add_fn = _iadd

    cleanup = False
    config = kwargs.pop('config', None)
    try:
        parsl.dfk()
    except RuntimeError:
        cleanup = True
        pass
    if cleanup and config is None:
        raise RuntimeError(
            "No active parsl DataFlowKernel, must specify a config to construct one"
        )
    elif not cleanup and config is not None:
        raise RuntimeError("An active parsl DataFlowKernel already exists")
    elif config is not None:
        parsl.clear()
        parsl.load(config)

    app = timeout(python_app(function))

    futures = set(app(item) for item in items)
    _futures_handler(futures, accumulator, status, unit, desc, add_fn,
                     tailtimeout)

    if cleanup:
        parsl.dfk().cleanup()
        parsl.clear()

    return accumulator
from functools import partial

logger = logging.getLogger("parsl.appworkflow")

parsl.set_stream_logger()
parsl.set_stream_logger(__name__)

logger.info("No-op log message to test log configuration")

# import configuration after setting parsl logging, because interesting
# construction happens during the configuration

import configuration

parsl.load(configuration.parsl_config)


# given a commandline, wrap it so that we'll invoke
# shifter appropriately (so that we don't need to
# hardcode shifter / singularity command lines all
# over the place.
def shifter_wrapper(img, cmd):
    wrapped_cmd = "shifter --entrypoint --image={} {}".format(img, cmd)
    return wrapped_cmd


def singularity_wrapper(img, inst_cat_root, work_and_out_path, cmd):
    wrapped_cmd = "singularity exec -B {},{},/projects/LSSTsky {} /projects/LSSTsky/Run3.0i/DESC_DC2_imSim_Workflow/docker_run.sh {}".format(
        inst_cat_root, work_and_out_path, img, cmd)
    return wrapped_cmd
Пример #16
0
lassen_config = Config(
    executors=[lassen_executor],
    strategy=None,
)

distributed_remote_config = Config(
    executors=[local_executor, lassen_executor],
    strategy=None,
)

print(parsl.__version__)
parsl.set_stream_logger()

parsl.clear()
#parsl.load(local_config)
parsl.load(distributed_remote_config)

#parsl.load(m1_htex)
#parsl.load(lassen_config)


# build a string that loads conda correctly
def load_conda():
    return ('CONDA_BASE=$(conda info --base)\n'
            'source ${CONDA_BASE}/etc/profile.d/conda.sh\n'
            'conda deactivate\n'
            'conda activate mirgeDriver.flame1d\n')


#@bash_app(executors=['local_threads'])
@bash_app(executors=['lassen_htex'])
Пример #17
0
    def run(self, debug=False):
        """
        Run trials provided by the optimizer while saving results.
        """
        if debug:
            parsl.set_stream_logger()
        self._dfk = parsl.load(self.parsl_config)

        logger.info(f'Starting ParslRunner with config\n{self}')

        flag = True
        initialize_flag = True
        result = None
        for idx, parameter_configs in enumerate(self.optimizer):
            try:
                logger.info(f'Writing script with configs {parameter_configs}')
                command_script_path, command_script_content = self._writeScript(
                    self.command, parameter_configs, 'command')
                if self.experiment.setup_template_string != None:
                    _, setup_script_content = self._writeScript(
                        self.experiment.setup_template_string,
                        parameter_configs, 'setup')
                else:
                    setup_script_content = None
                if self.experiment.finish_template_string != None:
                    _, finish_script_content = self._writeScript(
                        self.experiment.finish_template_string,
                        parameter_configs, 'finish')
                else:
                    finish_script_content = None
                # set warm-up experiments
                if initialize_flag:
                    initialize_flag = False
                    logger.info(
                        f'Starting initializing trial with script at {command_script_path}'
                    )
                    runConfig = paropt.runner.RunConfig(
                        command_script_content=command_script_content,
                        experiment_dict=self.experiment.asdict(),
                        setup_script_content=setup_script_content,
                        finish_script_content=finish_script_content,
                    )
                    initializing_func_param = {}
                    for key, val in self.obj_func_params.items():
                        initializing_func_param[key] = val
                    initializing_func_param['timeout'] = 300
                    # result = self.obj_func(runConfig, **self.obj_func_params).result()
                    result = self.obj_func(runConfig,
                                           **initializing_func_param).result()

                logger.info(
                    f'Starting trial with script at {command_script_path}')
                runConfig = paropt.runner.RunConfig(
                    command_script_content=command_script_content,
                    experiment_dict=self.experiment.asdict(),
                    setup_script_content=setup_script_content,
                    finish_script_content=finish_script_content,
                )
                result = None
                result = self.obj_func(runConfig,
                                       **self.obj_func_params).result()
                self._validateResult(parameter_configs, result)
                trial = Trial(
                    outcome=result['obj_output'],
                    parameter_configs=parameter_configs,
                    run_number=self.run_number,
                    experiment_id=self.experiment.id,
                    obj_parameters=result['obj_parameters'],
                )
                self.storage.saveResult(self.session, trial)
                self.optimizer.register(trial)
                self.run_result[
                    'success'] = True and self.run_result['success']
                flag = flag and self.run_result['success']
                self.run_result['message'][
                    f'experiment {self.experiment.id} run {self.run_number}, config is {parameter_configs}'] = (
                        f'Successfully completed trials {idx} for experiment')

            except Exception as e:
                err_traceback = traceback.format_exc()
                if result is not None and result[
                        'stdout'] == 'Timeout':  # for timeCommandLimitTime in lib, timeout
                    trial = Trial(
                        outcome=result['obj_output'],
                        parameter_configs=parameter_configs,
                        run_number=self.run_number,
                        experiment_id=self.experiment.id,
                        obj_parameters=result['obj_parameters'],
                    )
                    self.optimizer.register(trial)
                    logger.exception(f'time out')
                    self.storage.saveResult(self.session, trial)
                    self.run_result['success'] = False
                    self.run_result['message'][
                        f'experiment {self.experiment.id} run {self.run_number}, config is {parameter_configs}'] = (
                            f'Failed to complete trials {idx}:\nError: {e}\n{err_traceback}'
                        )

                else:
                    trial = Trial(
                        outcome=10000000,
                        parameter_configs=parameter_configs,
                        run_number=self.run_number,
                        experiment_id=self.experiment.id,
                        obj_parameters={},
                    )
                    self.storage.saveResult(self.session, trial)
                    self.run_result['success'] = False
                    self.run_result['message'][
                        f'experiment {self.experiment.id} run {self.run_number}, config is {parameter_configs}'] = (
                            f'Failed to complete trials {idx}:\nError: {e}\n{err_traceback}'
                        )

        logger.info(f'Finished; Run result: {self.run_result}')
Пример #18
0
import parsl
from parsl.app.app import python_app, bash_app
from config.midway import midway_htex
from jinja2 import Template, Environment
from jinja2.loaders import FileSystemLoader
import os

parsl.set_stream_logger()

parsl.load(midway_htex)


# Helper function to create an input file from a template
def create_template(template_name, output_name, contents):
    fs_loader = FileSystemLoader('config_files/templates')
    env = Environment(loader=fs_loader)
    template = env.get_template(template_name)
    t_path = os.path.join("config_files", output_name)
    t_file = open(t_path, 'w')
    t_file.write(template.render(contents))
    t_file.close()
    return t_path


#################
# App definitions
#################
@bash_app(executors=['midway_cpu'])
def packmol(input_file=None, inputs=[], outputs=[], stdout=None, stderr=None):
    return "/scratch/midway2/chard/clean/packmol-17.163/packmol < %s" % input_file
Пример #19
0
#!/mnt/lustre_fs/users/kavotaw/apps/anaconda/anaconda3/envs/parsl_py36/bin/python3
import parsl, os
from config import mcullaghcluster
from library import *
from parsl.data_provider.files import File
parsl.set_stream_logger()
parsl.load(mccluster)

# RIGID DOCKING
ligands = open('list.dat', 'r')
for i in ligands:
    run = vina(inputs=[i], outputs=[rigid_docking_data])
run.result()

# RIGID ML
names, mols = getNamesMols(inputs=[ligands, rigid_docking_data])
names, features = getAllFeatures(inputs=[names, mols], outputs=[features_file])
train_and_test_svm_and_nn(
    inputs=[rigid_docking_data, features_file],
    outputs=[svm_model, r2_svm, nn_model, r2_nn, rigid_features_and_energies])

# SELECT TOP RESULTS
percent = '10'
select_top_percent(inputs=[percent, rigid_features_and_energies],
                   outputs=[top_ligands])

# FLEXIBLE DOCKING
for i in top_ligands:
    run = adfr(inputs=[i])
run.result()
Пример #20
0
import parsl
from config import htex_config
from library import increment

parsl.load(htex_config)

for i in range(5):
    print('{} + 1 = {}'.format(i, increment(i).result()))
Пример #21
0
def main():

    args = parse_args(sys.argv[1:])

    # Copy V-pipe repo as main working directory
    tmp_dir = join(cwd, 'tmp')
    vpipe_dir = join(tmp_dir, 'vpipe')
    base_params = {
        'container': abspath(args.container),
        'work_dir': tmp_dir,
        'read_length': args.read_length,
        'variant_frequency': args.variant_frequency,
        'read_cutoff_bp': args.read_cutoff_bp,
        'primers_bp': args.primers_bp,
        'depth_cap': float(args.depth_cap),
        'stdout': abspath(join(args.outputs, 'mappgene.stdout')),
    }

    if shutil.which('singularity') is None:
        raise Exception(
            f"Missing Singularity executable in PATH.\n\n" +
            f"Please ensure Singularity is installed: https://sylabs.io/guides/3.0/user-guide/installation.html"
        )

    if not exists(base_params['container']):
        raise Exception(
            f"Missing container image at {base_params['container']}\n\n" +
            f"Either specify another image with --container\n" +
            f"Or build the container with the recipe at: {join(script_dir, 'data/container/recipe.def')}\n"
            +
            f"Or download the container with this command:\n\n$ singularity pull image.sif library://avilaherrera1/mappgene/image.sif:latest\n"
        )

    smart_remove(tmp_dir)
    smart_mkdir(tmp_dir)

    run(f'cp -rf /opt/vpipe {vpipe_dir}', base_params)
    smart_copy(join(script_dir, 'data/extra_files'), tmp_dir)

    run(f'cd {vpipe_dir} && sh init_project.sh || true', base_params)
    update_permissions(tmp_dir, base_params)

    if args.test:
        args.inputs = join(script_dir, 'data/example_inputs/*.fastq.gz')

    if isinstance(args.inputs, str):
        args.inputs = glob(args.inputs)

    all_params = {}

    # Copy reads to subject directory
    for input_read in args.inputs:

        pair1 = input_read.replace('_R2', '_R1')
        pair2 = input_read.replace('_R1', '_R2')
        if input_read != pair1 and pair2 not in args.inputs:
            raise Exception(f'Missing paired read: {pair2}')
        if input_read != pair2 and pair1 not in args.inputs:
            raise Exception(f'Missing paired read: {pair1}')

        subject = (basename(input_read).replace('.fastq.gz', '').replace(
            '.fastq', '').replace('_R1', '').replace('_R2',
                                                     '').replace('.', '_'))
        subject_dir = abspath(join(args.outputs, subject))

        if not subject in all_params:
            smart_copy(tmp_dir, subject_dir)
            params = base_params.copy()
            params['work_dir'] = subject_dir
            params['input_reads'] = [input_read]
            params['stdout'] = join(subject_dir, 'worker.stdout')
            all_params[subject] = params
        else:
            all_params[subject]['input_reads'].append(input_read)

    if args.slurm:
        executor = parsl.executors.HighThroughputExecutor(
            label="worker",
            address=parsl.addresses.address_by_hostname(),
            provider=parsl.providers.SlurmProvider(
                args.partition,
                launcher=parsl.launchers.SrunLauncher(),
                nodes_per_block=int(args.nnodes),
                init_blocks=1,
                max_blocks=1,
                worker_init=f"export PYTHONPATH=$PYTHONPATH:{os.getcwd()}",
                walltime=args.walltime,
                scheduler_options="#SBATCH --exclusive\n#SBATCH -A {}\n".
                format(args.bank),
                move_files=False,
            ),
        )
    elif args.flux:
        executor = parsl.executors.FluxExecutor(
            label="worker",
            flux_path=
            "/usr/global/tools/flux/toss_3_x86_64_ib/flux-c0.28.0.pre-s0.17.0.pre/bin/flux",
            provider=parsl.providers.SlurmProvider(
                args.partition,
                launcher=parsl.launchers.SrunLauncher(),
                nodes_per_block=int(args.nnodes),
                init_blocks=1,
                max_blocks=1,
                worker_init=f"export PYTHONPATH=$PYTHONPATH:{os.getcwd()}",
                walltime=args.walltime,
                scheduler_options="#SBATCH --exclusive\n#SBATCH -A {}\n".
                format(args.bank),
                move_files=False,
            ),
        )
    else:
        executor = parsl.executors.ThreadPoolExecutor(label="worker")

    config = parsl.config.Config(executors=[executor])
    parsl.set_stream_logger()
    parsl.load(config)

    if args.ivar:
        results = []
        for params in all_params.values():
            results.append(run_ivar(params))
        for r in results:
            r.result()

    elif args.vpipe:
        results = []
        for params in all_params.values():
            results.append(run_vpipe(params))
        for r in results:
            r.result()
Пример #22
0
def local_setup():
    global dfk
    dfk = parsl.load(config)
Пример #23
0
            max_blocks=30,
            min_blocks=0,
            overrides='module load apps/openmpi/gnu/3.0.0',
            queue='batch',
            channel=SSHChannel(hostname="va-murphy-login.kdi.local",
                               username=os.getenv("USER").split('@')[0],
                               script_dir=os.getenv("HOME") +
                               "/code-va/parsl-workflows/ssh_scripts"),
            launcher=MpiExecLauncher(),
            walltime='4000:00:00'))
],
                        retries=0)

#local_config = Config(executors=[IPyParallelExecutor(label="local_ipp",provider=LocalProvider(channel=LocalChannel(),init_blocks=23,max_blocks=23))])

dfk = parsl.load(minimap_config)

#parsl.load(local_config)
#workers = ThreadPoolExecutor(max_workers=4)
#dfk = DataFlowKernel(executors=[workers])
#dfk = DataFlowKernel(config = config)

#workers = IPyParallelExecutor()
#dfk = DataFlowKernel(workers)


# submitting plink tool
# plink2 --threads 36 --chr $chr --bfile chr$chr --export vcf id-paste=iid bgz --out chr$chr.plink
@App('bash', executors=['PLINKandEagle'], cache=True)
def plink2(b_inputs=[], chrom=[], outputs=[], stdout=None, stderr=None):
    out_prefix = outputs[0].replace(".vcf.gz", "")
Пример #24
0
                scheduler_options='',
                # Command to be run before starting a worker, such as:
                worker_init='module load miniconda-3; export PATH=$PATH:{}'.
                format(MY_USER_PATH),
                cmd_timeout=120,
            ),
        ),
        ThreadPoolExecutor(label='login-node', max_threads=8),
    ],
    monitoring=MonitoringHub(
        hub_address=address_by_hostname(),
        hub_port=55055,
        monitoring_debug=False,
        resource_monitoring_interval=10,
    ))
parsl.load(parsl_config)


@python_app(executors=['theta-htex'])
def pi(num_points):
    from random import random

    inside = 0
    for i in range(num_points):
        x, y = random(), random()  # Drop a random point in the box.
        if x**2 + y**2 < 1:  # Count points within the circle.
            inside += 1

    return (inside * 4 / num_points)

Пример #25
0
def setup_module(module):
    parsl.load(config)
Пример #26
0
import parsl
import pytest
from parsl import App
from parsl.tests.configs.local_threads import config
config['globals']['lazyErrors'] = True
parsl.clear()
parsl.load(config)


@App('python')
def divide(a, b):
    return a / b


@pytest.mark.local
def test_lazy_behavior():
    """Testing non lazy errors to work"""

    items = []
    for i in range(0, 1):
        items.append(divide(10, i))

    while True:
        if items[0].done:
            break

    return


if __name__ == "__main__":
Пример #27
0
def cli_run():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--redishost",
        default="127.0.0.1",
        help="Address at which the redis server can be reached")
    parser.add_argument("--redisport",
                        default="6379",
                        help="Port on which redis is available")
    parser.add_argument("-d",
                        "--debug",
                        action='store_true',
                        help="Count of apps to launch")
    parser.add_argument("-m",
                        "--mac",
                        action='store_true',
                        help="Configure for Mac")
    args = parser.parse_args()

    if args.debug:
        parsl.set_stream_logger()

    if args.mac:
        config = Config(
            executors=[
                ThreadPoolExecutor(label="htex"),
                ThreadPoolExecutor(label="local_threads")
            ],
            strategy=None,
        )
    else:
        config = Config(
            executors=[
                HighThroughputExecutor(
                    label="htex",
                    # Max workers limits the concurrency exposed via mom node
                    max_workers=2,
                    provider=LocalProvider(
                        init_blocks=1,
                        max_blocks=1,
                    ),
                ),
                ThreadPoolExecutor(label="local_threads")
            ],
            strategy=None,
        )
    parsl.load(config)

    print(
        '''This program creates an "MPI Method Server" that listens on an inputs queue and write on an output queue:

        input_queue --> mpi_method_server --> queues

To send it a request, add an entry to the inputs queue:
     run "pipeline-pump -p N" where N is an integer request
To access a value, remove it from the outout queue:
     run "pipeline-pull" (blocking) or "pipeline-pull -t T" (T an integer) to time out after T seconds
     TODO: Timeout does not work yet!
''')

    # Get the queues for the method server
    method_queues = MethodServerQueues(args.redishost, port=args.redisport)

    # Start the method server
    mms = ParslMethodServer([target_fun],
                            method_queues,
                            default_executors=['htex'])
    mms.run()