Exemplo n.º 1
0
def prepare_treatment_jobs(treatment, tmp_local_dir, local_result_path,
                           local_user, local_host, remote_host, remote_user,
                           remote_path, label_for_cluster):
    """
    Prepare somaworkflow jobs to perform one treatment (ie one subject).

    Args:
        treatment (FMRITreatment): the treatment defining the analysis
        tmp_local_dir (str): a path where to store the temporary config file
                             before sending it to the remote host
        local_result_path (str): path where to store the final result
        local_user (str): the user on the local host who enables SHH connection
                          from the remote cluster
        local_host (str): local host (used to send back the result)
        remote_host (str): remote machine where the treatment will be run
        remote_user (str): user login on the remote machine.
        remote_path (str): path on the remote machine where to store ROI data and
                           analysis results
        label_for_cluster (str): label prefix to name job in somaworkflow

    Returns:
            a tuple (job_split, jobs, dependencies, mainGroup)

            job_split (Job): job handling splitting of input data into ROI data
            jobs (list of Job): all jobs except the splitting jobs
                               -> roi analyses, result merge,
                                  scp of result back to local host, data cleaning
            dependencies (list of Job pairs): define the pipeline structure
            mainGroup (Group): top-level object gathering all jobs for
                               this treatment.
    """

    # roiFiles contains the list of files that will be produced by job_split
    roiFiles, roiIds = treatment.dump_roi_datasets(dry=True)

    pyhrf.verbose(1, 'Get list of splitted data files ... %d files' \
                      %len(roiFiles))
    datafiles = treatment.get_data_files()

    # Make all path be relative in the treatment config file
    # so that data file can be found on the cluster file system
    treatment.replace_data_dir('./')
    remote_cfg_file = op.join(tmp_local_dir,'./detectestim_remote.xml')
    treatment.set_init_param('make_outputs', False)
    pyhrf.verbose(1, 'Save remote treatment to %s' %remote_cfg_file)
    save_treatment(treatment, remote_cfg_file)

    pyhrf.verbose(1, 'Upload input data')
    # All data which are the inputs of the workflow:
    data_to_upload = datafiles+[remote_cfg_file]
    remote_input_files = remote_copy(data_to_upload, remote_host,
                                     remote_user, remote_path)
    #print 'remote_input_files:'
    #print remote_input_files
    pyhrf.verbose(1, 'Remove tmp remote cfg file')
    os.remove(remote_cfg_file)

    pyhrf.verbose(1, 'Prepare jobs ...')
    pyhrf.verbose(1, 'Job split ...')
    verbosity = pyhrf.verbose.verbosity
    cmd = ["pyhrf_split_roidata","-c", basename(remote_cfg_file),
           "-v %d" %verbosity, "-d", "./"]
    pyhrf.verbose(2, '-> %s' %cmd)
    job_split = Job(cmd, working_directory=remote_path, name="roi_split")

    pyhrf.verbose(1, 'Jobs JDE ...')
    jobs_jde = [Job(["pyhrf_jde_estim","-c", basename(remote_cfg_file),
                     "-r", basename(roiFile), "-v %d" %verbosity],
                    working_directory=remote_path,
                    name="jde_r%04d" %roiId)
                for roiFile, roiId in zip(roiFiles, roiIds)]
    pyhrf.verbose(2, 'First jde job -> %s' %jobs_jde[0].command)
    # Files produced by all JDE jobs, which will be then used as input of the
    # merge job:
    resultFiles = ["result_%04d.pck" %iroi for iroi in roiIds]

    pyhrf.verbose(1, 'Job pack result ...')
    # Output of the merge job, which has to transfered back to local:
    remote_resultFile = './result.pck'
    pyhrf.verbose(1, 'Remote result file: %s' %remote_resultFile)

    cmd = ["pyhrf_pack_results",'-v1','-o',remote_resultFile]+resultFiles
    pyhrf.verbose(3, 'cmd pack result: %s' %cmd)
    job_merge = Job(cmd, working_directory=remote_path,
                    name="merge_results")


    # Retrieve result file:
    #local_host = "132.166.200.5" #HACK
    #cmd = ["pyhrf_shell_cmd", "scp","-C",remote_resultFile, "%s@%s:\"%s\"" \
               #%(local_user,local_host,local_result_path)]
    cmd = ["scp","-C",remote_resultFile, "%s@%s:\"%s\"" \
               %(local_user,local_host,local_result_path)]           
    
    pyhrf.verbose(2, 'cmd scp result: %s' %cmd)
    job_scp_result = Job(cmd, working_directory=remote_path, name="scp_result")

    # Clean everything:
    # -> all input files, splitted roi data, result for each data, merged result:
    #cmd = ["pyhrf_shell_cmd", "rm","-f", remote_resultFile] + \
      #map(basename, roiFiles) + resultFiles + remote_input_files
    #pyhrf.verbose(3, 'cmd clean: %s' %cmd)
    cmd = ["rm","-f", remote_resultFile] + \
      map(basename, roiFiles) + resultFiles + remote_input_files
    pyhrf.verbose(3, 'cmd clean: %s' %cmd)
    job_clean = Job(cmd, working_directory=remote_path, name="clean_files")

    pyhrf.verbose(1,'Setup of work flow ...')

    # Build the Job lists, dependencies and group
    clean = True
    if clean:
        nodes = [job_merge,job_scp_result,job_clean] + jobs_jde
    else:
        nodes = [job_merge,job_scp_result] + jobs_jde
    dependencies = []
    for jj in jobs_jde:
        dependencies.append((job_split,jj))
        dependencies.append((jj,job_merge))
    dependencies.append((job_merge,job_scp_result))
    if clean:
        dependencies.append((job_scp_result,job_clean))

    jjGroup = Group(elements=jobs_jde, name=label_for_cluster+'-roi_jobs')
    if clean:
        elements = [job_split,jjGroup,job_merge,
                    job_scp_result,job_clean]
    else:
        elements = [job_split,jjGroup,job_merge,
                    job_scp_result]
    mainGroup = Group(name=label_for_cluster,
                      elements=elements)

    return job_split, nodes, dependencies, mainGroup
Exemplo n.º 2
0
    def run(self, parallel=None, n_jobs=None):
        """
        Run the the analysis: load data, run estimation, output results
        """
        if parallel is None:
            result = self.execute()
        elif parallel == 'local':
            cfg_parallel = pyhrf.cfg['parallel-local']
            try:
                from joblib import Parallel, delayed
            except ImportError:
                print 'Can not import joblib. It is required to enable '\
                    'parallel processing on a local machine.'
                sys.exit(1)

            parallel_verb = pyhrf.verbose.verbosity
            if pyhrf.verbose.verbosity == 6:
                parallel_verb = 10

            if n_jobs is None:
                n_jobs = cfg_parallel['nb_procs']

            p = Parallel(n_jobs=n_jobs, verbose=parallel_verb)
            result = p(delayed(exec_t)(t) for t in self.split())
            # join list of lists:
            result = list(itertools.chain.from_iterable(result))

        elif parallel == 'LAN':

            from pyhrf import grid
            cfg_parallel = pyhrf.cfg['parallel-LAN']
            remoteUser = cfg_parallel['user']

            #1. Some checks on input/output directory
            remoteDir = cfg_parallel['remote_path']
            # At the end, results will be retrieved direclty from remoteDir,
            # which has to be readable
            if remoteDir is None or not op.exists(remoteDir):
                raise Exception('Remote directory is not readable (%s)' \
                                    %remoteDir)

            # Try if remoteDir is writeable, so that we don't need to upload
            # data via ssh
            remote_writeable = False
            if os.access(remoteDir, os.W_OK):
                remote_writeable = True
                tmpDir = remoteDir
            else:
                pyhrf.verbose(1, 'Remote dir is not writeable -> using tmp ' \
                                  'dir to store splitted data & then upload.')

            #2. split roi data
            pyhrf.verbose(1, 'Path to store sub treatments: %s' %tmpDir)
            treatments_dump_files = []
            self.split(dump_sub_results=True, output_dir=tmpDir,
                       make_sub_outputs=False,
                       output_file_list=treatments_dump_files)

            #3. copy data to remote directory
            if not remote_writeable:
                host = cfg_parallel['remote_host']
                pyhrf.verbose(1, 'Uploading data to %s ...' %(remoteDir))
                remote_input_files = pio.remote_copy(treatments_dump_files,
                                                     host, remoteUser, remoteDir)

            #4. create job list
            tasks_list = []
            for f in treatments_dump_files:
                f = op.join(remoteDir,op.basename(f))
                nice = cfg_parallel['niceness']
                tasks_list.append('nice -n %d %s -v%d -t "%s"' \
                                      %(nice,'pyhrf_jde_estim',
                                        pyhrf.verbose.verbosity,f))
            mode = 'dispatch'
            tasks = grid.read_tasks(';'.join(tasks_list), mode)
            timeslot = grid.read_timeslot('allday')
            hosts = grid.read_hosts(cfg_parallel['hosts'])
            brokenfile = op.join(tmpDir, 'pyhrf-broken_cmd.batch')
            logfile = op.join(self.output_dir, 'pyhrf-parallel.log')
            pyhrf.verbose(1, 'Log file for process dispatching: %s' \
                              %logfile)
            #3. launch them
            pyhrf.verbose(1, 'Dispatching processes ...')
            try:
                grid.run_grid(mode, hosts, 'rsa', tasks, timeslot, brokenfile,
                            logfile, user=remoteUser)
                grid.kill_threads()
            except KeyboardInterrupt:
                grid.quit(None, None)

            if len(open(brokenfile).readlines()) > 0:
                pyhrf.verbose(1, 'There are some broken commands, '\
                                  'trying again ...')
                try:
                    tasks = grid.read_tasks(brokenfile, mode)
                    grid.run_grid(mode, hosts, 'rsa', tasks, timeslot, brokenfile,
                                logfile, user=remoteUser)
                    grid.kill_threads()
                except KeyboardInterrupt:
                    grid.quit(None, None)

            #3.1 grab everything back ??
            #try:
                # "scp %s@%s:%s %s" %(remoteUser,host,
                #                     op.join(remoteDir,'result*'),
                #                     op.abspath(op.dirname(options.cfgFile))))
            #TODO : test if everything went fine

            #4. merge all results and create outputs
            result = []
            #if op.exists(remoteDir): TODO :scp if remoteDir not readable
            nb_treatments = len(treatments_dump_files)
            remote_result_files = [op.join(remoteDir, 'result_%04d.pck' %i) \
                                    for i in range(nb_treatments)]
            pyhrf.verbose(1,'remote_result_files: %s', str(remote_result_files))
            nres = len(filter(op.exists,remote_result_files))
            if nres == nb_treatments:
                pyhrf.verbose(1, 'Grabbing results ...')
                for fnresult in remote_result_files:
                    fresult = open(fnresult)
                    result.append(cPickle.load(fresult))
                    fresult.close()
            else:
                print 'Found only %d result files (expected %d)' \
                    %(nres, nb_treatments)
                print 'Something went wrong, check the log files'
            if not remote_writeable:
                pyhrf.verbose(1, 'Cleaning tmp dir (%s)...' %tmpDir)
                shutil.rmtree(tmpDir)
                pyhrf.verbose(1, 'Cleaning up remote dir (%s) through ssh ...' \
                                %remoteDir)
                cmd = 'ssh %s@%s rm -f "%s" "%s" "%s"' \
                    %(remoteUser, host, ' '.join(remote_result_files),
                      ' '.join(remote_input_files))
                pyhrf.verbose(2, cmd)
                os.system(cmd)
            else:
                pyhrf.verbose(1, 'Cleaning up remote dir (%s)...' %remoteDir)
                for f in os.listdir(remoteDir):
                    os.remove(op.join(remoteDir,f))

        elif parallel == 'cluster':

            from pyhrf.parallel import run_soma_workflow
            cfg = pyhrf.cfg['parallel-cluster']
            #create tmp remote path:
            date_now = time.strftime('%c').replace(' ','_').replace(':','_')
            remote_path = op.join(cfg['remote_path'], date_now)
            pyhrf.verbose(1,'Create tmp remote dir: %s' %remote_path)
            pio.remote_mkdir(cfg['server'], cfg['user'], remote_path)
            #if self.result_dump_file
            t_name = 'default_treatment'
            tmp_dir = pyhrf.get_tmp_path()
            label_for_cluster = self.analyser.get_label()
            if self.output_dir is None:
                out_dir = pyhrf.get_tmp_path()
            else:
                out_dir = self.output_dir
            result = run_soma_workflow({t_name:self}, 'pyhrf_jde_estim',
                                       {t_name:tmp_dir}, cfg['server_id'],
                                       cfg['server'], cfg['user'],
                                       {t_name:remote_path},
                                       {t_name:op.abspath(out_dir)},
                                       label_for_cluster, wait_ending=True)

        else:
            raise Exception('Parallel mode "%s" not available' %parallel)

        pyhrf.verbose(1, 'Retrieved %d results' %len(result))
        return self.output(result, (self.result_dump_file is not None),
                           self.make_outputs)