Exemplo n.º 1
0
def GenerateTask(tcfg, ecfg, pipe_name, stage_name, task_name):

    # Initialize a task object
    t = Task()

    # Define magic variable dictionary
    mvar_dict = {"PIPELINE_ID": pipe_name}

    # Give this task object a name
    t.name = task_name

    # Pre exec let you load modules, set environment before executing the workload
    if tcfg['pre_exec'] != "":
        t.pre_exec = [tcfg['pre_exec']]

    # Executable to use for the task
    t.executable = tcfg['executable']

    # If there's a user-defined input file (likely for genmod modules), add it to the
    # options list and upload file list if needed
    if "input_data_file" in tcfg['options']:
        tcfg['upload_input_data'].append(
            os.path.join(ecfg['exp_dir'], "input", ecfg['input_data_file']))

    # List of arguments for the executable
    t.arguments = [tcfg['script']] + match_options(tcfg['options'],
                                                   ecfg['options'])

    # CPU requirements for this task
    t.cpu_threads = {
        'processes': tcfg['cpu']['processes'],
        'process-type': tcfg['cpu']['process-type'],
        'threads-per-process': tcfg['cpu']['threads-per-process'],
        'thread-type': tcfg['cpu']['thread-type'],
    }

    # Upload data from your local machine to the remote machine
    # Note: Remote machine can be the local machine
    t.upload_input_data = tcfg['upload_input_data']

    # Copy data from other stages/tasks for use in this task
    copy_list = []
    if "copy_input_data" in tcfg.keys():
        for copy_stage in tcfg['copy_input_data'].keys():
            for copy_task in tcfg['copy_input_data'][copy_stage].keys():
                loc = "$Pipeline_{0}_Stage_{1}_Task_{2}".format(
                    pipe_name, copy_stage, copy_task)
                copy_list.extend([
                    '{0}/{1}'.format(loc, mvar_replace_dict(mvar_dict, x))
                    for x in tcfg['copy_input_data'][copy_stage][copy_task]
                ])

    # Append the copy list (if any) to the task object
    t.copy_input_data = copy_list

    # Set the download data for the task
    download_list = []
    outdir = os.path.join(ecfg['exp_dir'], "output")
    if "download_output_data" in tcfg.keys():
        download_list.extend([
            '{0} > {1}/{0}'.format(mvar_replace_dict(mvar_dict, x), outdir)
            for x in tcfg['download_output_data']
        ])

    # Append the download list to this task
    t.download_output_data = download_list

    # Return the task object
    return (t)
Exemplo n.º 2
0
def generate_pipeline(cfg):

    cfg_file = cfg['run_cfg_file']  # resource and workload config
    run_file = cfg['run_file']  # runs for this campaign

    # setup S1 workload
    cfg = ru.Config(cfg=ru.read_json(cfg_file))
    runs = check_runs(cfg_file, run_file)

    if not runs:
        print('S1: nothing to run, exiting.')
        return

    # for each run in the campaign:
    # - create cfg with requested receptor and smiles
    # - create a number of masters as EnTK tasks and add them to a pipeline
    # - submit configured number of masters with that cfg

    # setup EnTK pipeline
    p = Pipeline()
    p.name = 'S1-RAPTOR'
    s = Stage()

    # create cfg
    subs = dict()
    rurl = cfg.fs_url + cfg.workload.results
    d = rs.filesystem.Directory(rurl)
    ls = [str(u).split('/')[-1] for u in d.list()]

    workload = cfg.workload

    for receptor, smiles, nodes, runtime in runs:

        print('%30s  %s' % (receptor, smiles))
        name = '%s_-_%s' % (receptor, smiles)
        tgt = '%s.%s.gz' % (name, workload.output)
        # rec  = False

        # if tgt in ls:
        #     if workload.recompute:
        #         rec += 1
        #         d.move(tgt, tgt + '.bak')
        #     else:
        #         print('skip      1 %s' % name)
        #         continue

        # if smiles in ls:
        #     if smiles not in subs:
        #         subs[smiles] = [str(u).split('/')[-1]  for u in d.list('%s/*' % smiles)]
        #     if tgt in subs[smiles]:
        #         if workload.recompute:
        #             rec += 2
        #             d.move('%s/%s'     % (smiles, tgt),
        #                     '%s/%s.bak' % (smiles, tgt))
        #         else:
        #             print('skip      2 %s' % name)
        #             continue

        ## if os.path.exists('results/%s.%s.gz' % (name, wofkload.output)):
        ##     print('skip      3 %s' % name)
        ##     continue

        #if rec: print('recompute %d %s' % (rec, name))
        #else  : print('compute   2 %s'  %       name)

        cpn = cfg.cpn
        gpn = cfg.gpn
        n_masters = cfg.n_masters

        cfg.workload.receptor = receptor
        cfg.workload.smiles = smiles
        cfg.workload.name = name
        cfg.nodes = nodes
        cfg.runtime = runtime
        cfg.n_workers = int(nodes / n_masters - 1)
        print('n_workers: %d' % cfg.n_workers)

        ru.write_json(cfg, 'configs/wf0.%s.cfg' % name)

        for i in range(n_masters):
            t = Task()

            t.pre_exec = [
                '. /gpfs/alpine/scratch/mturilli1/med110/radical.pilot.sandbox/s1.to/bin/activate'
            ]

            t.executable = "python3"
            t.arguments = ['wf0_master.py', i]
            t.cpu_threads = cpn
            t.upload_input_data = [
                'wf0_master.py', 'wf0_worker.py',
                'configs/wf0.%s.cfg > wf0.cfg' % name, 'read_ligand_dict.py'
            ]
            t.link_input_data = ['%s > input_dir' % workload.input_dir]
            t.download_output_data = [
                '%s.%s.gz > results/%s.%s.gz' %
                (name, workload.output, name, workload.output)
            ]
            # t.input_staging  = [{'source': 'wf0_master.py',
            #                         'target': 'wf0_master.py',
            #                         'action': rp.TRANSFER,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': 'wf0_worker.py',
            #                         'target': 'wf0_worker.py',
            #                         'action': rp.TRANSFER,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': 'configs/wf0.%s.cfg' % name,
            #                         'target': 'wf0.cfg',
            #                         'action': rp.TRANSFER,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': workload.input_dir,
            #                         'target': 'input_dir',
            #                         'action': rp.LINK,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': workload.impress_dir,
            #                         'target': 'impress_md',
            #                         'action': rp.LINK,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                         {'source': 'read_ligand_dict.py',
            #                         'target': 'read_ligand_dict.py',
            #                         'action': rp.TRANSFER,
            #                         'flags' : rp.DEFAULT_FLAGS},
            #                     ]
            # t.output_staging = [{'source': '%s.%s.gz'         % (name, workload.output),
            #                      'target': 'results/%s.%s.gz' % (name, workload.output),
            #                      'action': rp.TRANSFER,
            #                      'flags' : rp.DEFAULT_FLAGS}]
            s.add_tasks(t)

    p.add_stages(s)

    return p