示例#1
0
文件: pipeline.py 项目: Parsl/ceci
 def __init__(self, launcher_config, stages):
     self.stage_execution_config = {}
     self.stage_names = []
     self.mpi_command = launcher_config['mpi_command']
     self.dfk = parsl.DataFlowKernel(launcher_config)
     for info in stages:
         self.add_stage(info)
示例#2
0
 def __init__(self, launcher_config, stages, log_dir, pycmd='python3'):
     self.log_dir = log_dir
     self.stage_execution_config = {}
     self.stage_names = []
     self.mpi_command = launcher_config['mpi_command']
     self.python_command = pycmd
     self.dfk = parsl.DataFlowKernel(launcher_config, rundir=self.log_dir)
     for info in stages:
         self.add_stage(info)
示例#3
0
def main():
    display("Loading DFK")
    parsl.set_file_logger("parsl.log", level=logging.DEBUG)

    dfk = parsl.DataFlowKernel(config=parsl_configs.rccNodeExclusive)

    display("Loading App")
    full_app = gen_full_tokenizer(dfk)

    display("Loading data iter")
    datIter = subjectIter()

    display("Starting run")

    running = {}
    done = False
    succCount = 0
    doneIter = False
    try:
        while not done:
            #Only add maxRunning jobs to the queue at once
            while len(running) < maxRunning:
                batch = []
                for i in range(perBatch):
                    try:
                        batch.append(next(datIter))
                    except StopIteration:
                        #If none left just skip adding
                        doneIter = True
                        break
                batchName = batch[0]['wos_id']
                running[batchName] = full_app(batch)
            succCount += checkRunning(running)
            display("Completed {}".format(succCount))
            #End the loop if all jobs are done and no more can be added
            if doneIter and len(running) < 1:
                done = True

    except KeyboardInterrupt:
        display("Closing down")
        dfk.cleanup()
        raise
    except:
        resetStdout()
        raise

    dfk.cleanup()
    display("Done")
import parsl

workers = parsl.ThreadPoolExecutor(max_workers=1)
dfk = parsl.DataFlowKernel(executors=[workers])

#from config.cori import config
#dfk = parsl.DataFlowKernel(config=config)