def __init__(self): self.set_argparse() self._set_rmq() self.am = entk.AppManager(hostname=self.rmq_hostname, port=self.rmq_port) self.p = entk.Pipeline() self.s = entk.Stage()
def __init__(self): self.set_argparse() self._set_rmq() self.am = entk.AppManager(hostname=self.rmq_hostname, port=self.rmq_port, username=self.rmq_username, password=self.rmq_password) #pipelines = [] self.pipelines = [] self.p1 = entk.Pipeline() self.p2 = entk.Pipeline() #pipelines.append(self.p1) #pipelines.append(self.p2) self.s1 = entk.Stage() self.s2 = entk.Stage() self.s3 = entk.Stage() self.s4 = entk.Stage() self.s5 = entk.Stage() self.s6 = entk.Stage() self.s7 = entk.Stage()
def generate_discover_pipe(self, filetype='csv', img_ftype='tif'): ''' This function takes as an input paths on Bridges and returns a pipeline that will provide a file for all the images that exist in that path. ''' pipeline = re.Pipeline() pipeline.name = 'Disc' stage = re.Stage() stage.name = 'Disc.S0' if self._paths is None: raise RuntimeError('Images paths are not set.') # Create the module load list modules_load = list() if self._modules: for module in self._modules: tmp_load = 'module load %s' % module modules_load.append(tmp_load) tmp_pre_execs = ['unset PYTHONPATH'] if self._pre_execs: tmp_pre_execs = tmp_pre_execs + modules_load + self._pre_execs else: tmp_pre_execs = tmp_pre_execs + modules_load for i in range(len(self._paths)): task = re.Task() task.name = 'Disc.T%d' % i task.pre_exec = tmp_pre_execs task.executable = 'python' # Assign executable to the task task.arguments = [ 'image_disc.py', '%s' % self._paths[i], '--image_ftype=%s' % img_ftype, '--filename=images%d' % i, '--filetype=%s' % filetype, '--filesize' ] task.download_output_data = ['images%d.csv' % i] task.upload_input_data = [ os.path.dirname(os.path.abspath(__file__)) + '/image_disc.py' ] task.cpu_reqs = { 'cpu_processes': 1, 'cpu_process_type': '', 'cpu_threads': 1, 'cpu_thread_type': 'OpenMP' } stage.add_tasks(task) # Add Stage to the Pipeline pipeline.add_stages(stage) return pipeline
def _generate_pipeline(self, name, pre_execs, image, gpu_id): ''' This function creates a pipeline for an image that will be analyzed. :Arguments: :name: Pipeline name, str :image: image path, str :model_path: Path to the model file, str :model_arch: Prediction Model Architecture, str :model_name: Prediction Model Name, str :hyperparam_set: Which hyperparameter set to use, str ''' # Create a Pipeline object entk_pipeline = re.Pipeline() entk_pipeline.name = name # Create a Stage object stage0 = re.Stage() stage0.name = '%s-S0' % (name) # Create Task 1, training task1 = re.Task() task1.name = '%s-T0' % stage0.name task1.pre_exec = pre_execs task1.executable = 'iceberg_penguins.detect' # Assign task executable # Assign arguments for the task executable task1.arguments = ['--gpu_ids', gpu_id, '--name', self._model_name, '--epoch', self._epoch, '--checkpoints_dir', self._model_path, '--output', self._output_path, '--testset', 'GE', '--input_im', image.split('/')[-1]] task1.link_input_data = ['%s' % image] task1.cpu_reqs = {'cpu_processes': 1, 'cpu_threads': 1, 'cpu_process_type': None, 'cpu_thread_type': 'OpenMP'} task1.gpu_reqs = {'gpu_processes': 1, 'gpu_threads': 1, 'gpu_process_type': None, 'gpu_thread_type': 'OpenMP'} # Download resuting images # task1.download_output_data = ['%s/ > %s' % (image.split('/')[-1]. # split('.')[0], # image.split('/')[-1])] # task1.tag = task0.name stage0.add_tasks(task1) # Add Stage to the Pipeline entk_pipeline.add_stages(stage0) return entk_pipeline
def setup(self): # prepare input for all replicas writeInputs.writeInputs(max_temp=self._max_temp, min_temp=self._min_temp, replicas=self._en_size, timesteps=self._timesteps, basename=self._basename) # and tar it up tar = tarfile.open("input_files.tar", "w") for name in [self._basename + ".prmtop", self._basename + ".inpcrd", self._basename + ".mdin"]: tar.add(name) for replica in self._replicas: tar.add ('mdin-%s-0' % replica.rid) #how does this work os.remove('mdin-%s-0' % replica.rid) tar.close() # create a single pipeline with one stage to transfer the tarball task = re.Task() task.name = 'untarTsk' task.executable = ['python'] task.upload_input_data = ['untar_input_files.py', 'input_files.tar'] task.arguments = ['untar_input_files.py', 'input_files.tar'] task.cpu_reqs = {'processes' : 1, 'thread_type' : None, 'threads_per_process': 1, 'process_type': None} task.post_exec = [] stage = re.Stage() stage.name = 'untarStg' stage.add_tasks(task) #setup = re.Pipeline() #setup.name = 'untarPipe' #setup.add_stages(stage) setup_pipeline = re.Pipeline() setup_pipeline.name = 'untarPipe' setup_pipeline.add_stages(stage) return [setup_pipeline]
def setup_replicas(replicas, min_temp, max_temp, timesteps, basename): writeInputs.writeInputs(max_temp=max_temp, min_temp=min_temp, replicas=replicas, timesteps=timesteps, basename=basename) tar = tarfile.open("input_files.tar", "w") for name in [ basename + ".prmtop", basename + ".inpcrd", basename + ".mdin" ]: tar.add(name) for r in range(replicas): tar.add('mdin-{replica}-{cycle}'.format(replica=r, cycle=0)) os.remove('mdin-{replica}-{cycle}'.format(replica=r, cycle=0)) tar.close() setup_p = re.Pipeline() setup_p.name = 'untarPipe' # # unused # repo = git.Repo('.', search_parent_directories=True) # aux_function_path = repo.working_tree_dir untar_stg = re.Stage() untar_stg.name = 'untarStg' # Untar Task untar_tsk = re.Task() untar_tsk.name = 'untarTsk' untar_tsk.executable = ['python'] untar_tsk.upload_input_data = ['untar_input_files.py', 'input_files.tar'] untar_tsk.arguments = ['untar_input_files.py', 'input_files.tar'] untar_tsk.cpu_reqs = 1 untar_tsk.post_exec = [] untar_stg.add_tasks(untar_tsk) setup_p.add_stages(untar_stg) replica_sandbox = '$Pipeline_%s_Stage_%s_Task_%s' \ % (setup_p.name, untar_stg.name, untar_tsk.name) return setup_p, replica_sandbox
def setup(self): self._log.debug('=== data staging') # prepare input for all replicas writeInputs.writeInputs(max_temp=self._max_temp, min_temp=self._min_temp, replicas=self._size, timesteps=self._timesteps, basename=self._basename) # and tar it up tar = tarfile.open("input_files.tar", "w") for name in [ self._basename + ".prmtop", self._basename + ".inpcrd", self._basename + ".mdin" ]: tar.add(name) for replica in self._replicas: tar.add('mdin-%s-0' % replica.rid) os.remove('mdin-%s-0' % replica.rid) tar.close() # create a single pipeline with one stage to transfer the tarball task = re.Task() task.name = 'untarTsk' task.executable = 'python' task.upload_input_data = ['untar_input_files.py', 'input_files.tar'] task.arguments = ['untar_input_files.py', 'input_files.tar'] task.cpu_reqs = 1 task.post_exec = [] stage = re.Stage() stage.name = 'untarStg' stage.add_tasks(task) setup = re.Pipeline() setup.name = 'untarPipe' setup.add_stages(stage) # run the setup pipeline self.workflow = set([setup]) self.run()
def get_wf3_input(appman, cfg): # Assuming shared filesystem on login node this can be executed by the # script instead of EnTK. p = entk.Pipeline() p.name = 'get_wf3_input' s = entk.Stage() t = entk.Task() t.executable = ['python3'] t.arguments = [ 'gather.py', '-f', cfg['outlier_path'], '-p', cfg['top_path'] ] s.add_tasks(t) p.add_stages(s) appman.workflow = [p] appman.run()
def _generate_pipeline(self, name, pre_execs, image, image_size): ''' This function creates a pipeline for an image that will be analyzed. :Arguments: :name: Pipeline name, str :image: image path, str :image_size: image size in MBs, int :tile_size: The size of each tile, int :model_path: Path to the model file, str :model_arch: Prediction Model Architecture, str :model_name: Prediction Model Name, str :hyperparam_set: Which hyperparameter set to use, str ''' # Create a Pipeline object entk_pipeline = re.Pipeline() entk_pipeline.name = name # Create a Stage object stage0 = re.Stage() stage0.name = '%s.S0' % (name) # Create Task 1, training task0 = re.Task() task0.name = '%s.T0' % stage0.name task0.pre_exec = pre_execs task0.executable = 'iceberg_seals.tiling' # Assign tak executable # Assign arguments for the task executable task0.arguments = [ '--input_image=%s' % image.split('/')[-1], '--output_folder=$NODE_LFS_PATH/%s' % task0.name, '--bands=%s' % self._bands, '--stride=%s' % self._stride, '--patch_size=%s' % self._patch_size, '--geotiff=%s' % self._geotiff ] task0.link_input_data = [image] task0.cpu_reqs = { 'cpu_processes': 1, 'cpu_threads': 4, 'cpu_process_type': None, 'cpu_thread_type': 'OpenMP' } task0.lfs_per_process = image_size stage0.add_tasks(task0) # Add Stage to the Pipeline entk_pipeline.add_stages(stage0) # Create a Stage object stage1 = re.Stage() stage1.name = '%s.S1' % (name) # Create Task 1, training task1 = re.Task() task1.name = '%s.T1' % stage1.name task1.pre_exec = pre_execs task1.executable = 'iceberg_seals.predicting' # Assign task executable # Assign arguments for the task executable task1.arguments = [ '--input_dir=$NODE_LFS_PATH/%s' % task0.name, '--model_architecture=%s' % self._model_arch, '--hyperparameter_set=%s' % self._hyperparam, '--model_name=%s' % self._model_name, '--models_folder=./', '--output_dir=./%s' % image.split('/')[-1].split('.')[0], ] task1.link_input_data = ['$SHARED/%s' % self._model_name] task1.cpu_reqs = { 'cpu_processes': 1, 'cpu_threads': 1, 'cpu_process_type': None, 'cpu_thread_type': 'OpenMP' } task1.gpu_reqs = { 'gpu_processes': 1, 'gpu_threads': 1, 'gpu_process_type': None, 'gpu_thread_type': 'OpenMP' } # Download resulting images # task1.download_output_data = ['%s/ > %s' % (image.split('/')[-1]. # split('.')[0], # image.split('/')[-1])] task1.tags = {'colocate': task0.name} stage1.add_tasks(task1) # Add Stage to the Pipeline entk_pipeline.add_stages(stage1) return entk_pipeline
def _generate_pipeline(self, name, pre_execs, image, image_size): ''' This function creates a pipeline for an image that will be analyzed. :Arguments: :name: Pipeline name, str :pre_execs: things need to happen before execution :image: image path, str :image_size: image size in MBs, int ''' # Create a Pipeline object entk_pipeline = re.Pipeline() entk_pipeline.name = name # Create a Stage object stage0 = re.Stage() stage0.name = '%s.S0' % (name) # Create Task 1, training task0 = re.Task() task0.name = '%s.T0' % stage0.name task0.pre_exec = pre_execs task0.executable = 'iceberg_rivers.tiling' # Assign tak executable task0.arguments = [ '--input=%s' % image.split('/')[-1], '--output=$NODE_LFS_PATH/%s/' % task0.name, '--tile_size=%s' % self._tile_size, '--step=%s' % self._step ] task0.link_input_data = [image] task0.cpu_reqs = { 'cpu_processes': 1, 'cpu_threads': 4, 'cpu_process_type': None, 'cpu_thread_type': None } task0.lfs_per_process = image_size stage0.add_tasks(task0) # Add Stage to the Pipeline entk_pipeline.add_stages(stage0) # Create a Stage object stage1 = re.Stage() stage1.name = '%s.S1' % (name) # Create Task 1, training task1 = re.Task() task1.name = '%s.T1' % stage1.name task1.pre_exec = pre_execs task1.executable = 'iceberg_rivers.predicting' # Assign task executable # # Assign arguments for the task executable task1.arguments = [ '--input=$NODE_LFS_PATH/%s/' % task0.name, '--weights_path=%s' % self._weights_path, '--output_folder=$NODE_LFS_PATH/%s/' % task1.name ] # # task1.link_input_data = ['$SHARED/%s' % self._model_name] task1.cpu_reqs = { 'processes': 1, 'threads_per_process': 1, 'process_type': None, 'thread_type': None } task1.gpu_reqs = { 'processes': 1, 'threads_per_process': 1, 'process_type': None, 'thread_type': None } task0.lfs_per_process = image_size # Download resulting images # task1.download_output_data = ['%s/ > %s' % (image.split('/')[-1]. # split('.')[0], # image.split('/')[-1])] task1.tags = {'colocate': task0.name} stage1.add_tasks(task1) # Add Stage to the Pipeline entk_pipeline.add_stages(stage1) # Create a Stage object stage2 = re.Stage() stage2.name = '%s.S2' % (name) # Create Task 1, training task2 = re.Task() task2.name = '%s.T2' % stage2.name task2.pre_exec = pre_execs task2.executable = 'iceberg_rivers.mosaic' # Assign task executable # # Assign arguments for the task executable task2.arguments = [ '--input=$NODE_LFS_PATH/%s/' % task1.name, '--input_WV=%s' % image.split('/')[-1], '--tile_size=%s' % self._tile_size, '--step=%s' % self._step, '--output_folder=./' ] task2.cpu_reqs = { 'processes': 1, 'threads_per_process': 1, 'process_type': None, 'thread_type': None } task2.link_input_data = [image] task2.tags = {'colocate': task0.name} stage2.add_tasks(task2) # Add Stage to the Pipeline entk_pipeline.add_stages(stage2) return entk_pipeline
def _generate_pipeline(self, name, pre_execs, image, image_size): ''' This function creates a pipeline for an image that will be analyzed. :Arguments: :name: Pipeline name, str :image: image path, str :image_size: image size in MBs, int :tile_size: The size of each tile, int :model_path: Path to the model file, str :model_arch: Prediction Model Architecture, str :model_name: Prediction Model Name, str :hyperparam_set: Which hyperparameter set to use, str ''' # Create a Pipeline object entk_pipeline = re.Pipeline() entk_pipeline.name = name # Create a Stage object stage0 = re.Stage() stage0.name = '%s-S0' % (name) # Create Task 1, training task0 = re.Task() task0.name = '%s-T0' % stage0.name task0.pre_exec = pre_execs task0.executable = 'iceberg_seals.tiling' # Assign tak executable # Assign arguments for the task executable task0.arguments = ['--scale_bands=%s' % self._scale_bands, '--input_image=%s' % image.split('/')[-1], # This line points to the local filesystem of the # node that the tiling of the image happened. '--output_folder=$NODE_LFS_PATH/%s' % task0.name] task0.link_input_data = [image] task0.cpu_reqs = {'processes': 1, 'threads_per_process': 4, 'process_type': None, 'thread_type': 'OpenMP'} task0.lfs_per_process = image_size stage0.add_tasks(task0) # Add Stage to the Pipeline entk_pipeline.add_stages(stage0) # Create a Stage object stage1 = re.Stage() stage1.name = '%s-S1' % (name) # Create Task 1, training task1 = re.Task() task1.name = '%s-T1' % stage1.name task1.pre_exec = pre_execs task1.executable = 'iceberg_seals.predicting' # Assign task executable # Assign arguments for the task executable task1.arguments = ['--input_image', image.split('/')[-1], '--model_architecture', self._model_arch, '--hyperparameter_set', self._hyperparam, '--training_set', 'test_vanilla', '--test_folder', '$NODE_LFS_PATH/%s' % task0.name, '--model_path', './', '--output_folder', './%s' % image.split('/')[-1]. split('.')[0]] task1.link_input_data = ['$SHARED/%s' % self._model_name] task1.cpu_reqs = {'processes': 1, 'threads_per_process': 1, 'process_type': None, 'thread_type': 'OpenMP'} task1.gpu_reqs = {'processes': 1, 'threads_per_process': 1, 'process_type': None, 'thread_type': 'OpenMP'} # Download resuting images task1.download_output_data = ['%s/ > %s' % (image.split('/')[-1]. split('.')[0], image.split('/')[-1])] task1.tag = task0.name stage1.add_tasks(task1) # Add Stage to the Pipeline entk_pipeline.add_stages(stage1) return entk_pipeline
def replica_pipeline(self, rid, cycle, replica_cores, md_executable, timesteps, replica_sandbox): # ---------------------------------------------------------------------- def add_md_stg(rid, cycle): # md stg here print 'cycle: ', self.cycle md_tsk = re.Task() md_stg = re.Stage() md_tsk.name = 'mdtsk-{replica}-{cycle}'.format(replica=rid, cycle=self.cycle) md_tsk.link_input_data = [ '%s/inpcrd > inpcrd-{replica}-{cycle}'.format( replica=rid, cycle=self.cycle) % replica_sandbox, '%s/prmtop' % replica_sandbox, '%s/mdin-{replica}-{cycle} > mdin'.format( replica=rid, cycle=self.cycle) % replica_sandbox ] md_tsk.arguments = [ '-O', '-i', 'mdin', '-p', 'prmtop', '-c', 'inpcrd-{replica}-{cycle}'.format(replica=rid, cycle=self.cycle), '-o', 'out', '-x', 'mdcrd', '-r', '%s/inpcrd-{replica}-{cycle}'.format( replica=rid, cycle=self.cycle + 1) % replica_sandbox, '-inf', '%s/mdinfo-{replica}-{cycle}'.format( replica=rid, cycle=self.cycle) % replica_sandbox ] md_tsk.executable = [md_executable] md_tsk.cpu_reqs = { 'processes': replica_cores, 'process_type': '', 'threads_per_process': 1, 'thread_type': None } md_tsk.pre_exec = ['echo $SHARED'] md_stg.add_tasks(md_tsk) md_stg.post_exec = { 'condition': post_md, 'on_true': start_ex, 'on_false': suspend_replica } return md_stg # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- def add_ex_stg(rid, cycle): # ex stg here ex_tsk = re.Task() ex_stg = re.Stage() ex_tsk.name = 'extsk-{replica}-{cycle}'.format(replica=rid, cycle=cycle) for rid in range(len(waiting_replicas)): ex_tsk.link_input_data += [ '%s/mdinfo-{replica}-{cycle}'.format( replica=rid, cycle=self.cycle) % replica_sandbox ] ex_tsk.arguments = ['t_ex_gibbs.py', len(waiting_replicas) ] # This needs to be fixed ex_tsk.executable = ['python'] ex_tsk.cpu_reqs = { 'processes': 1, 'process_type': '', 'threads_per_process': 1, 'thread_type': None } ex_stg.add_tasks(ex_tsk) ex_stg.post_exec = { 'condition': post_ex, 'on_true': terminate_replicas, 'on_false': continue_md } return ex_stg # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- def post_md(): global replica_cycles print 'replica cyles: %s [%]' % (replica_cycles, rid) self.cycle += 1 replica_cycles[rid] += 1 print 'replica cyles: %s' % replica_cycles waiting_replicas.append(rid) if len(waiting_replicas) < max_waiting_list: return False return True # ---------------------------------------------------------------------- def suspend_replica(): p_replica.suspend() # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- def start_ex(): ex_stg = add_ex_stg(rid, cycle=self.cycle) p_replica.add_stages(ex_stg) # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- def post_ex(): if cycle > min_completed_cycles: return True return False # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- def terminate_replicas(): # Resume all replicas in list without adding stages for rid in waiting_replicas: replica_pipelines[rid].resume() print "DONE" # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- def continue_md(): # This needs to resume replica_pipelines[rid] # for all rid's in wait list print "continuing replicas" global waiting_replicas for rid in waiting_replicas: try: md_stg = add_md_stg(rid, cycle) replica_pipelines[rid].add_stages(md_stg) if replica_pipelines[rid] is rid: pass else: replica_pipelines[rid].resume() # This is throwing an error: cannot resume itself since # it is not suspended. Since the pipeline that is # triggering this choice is NOT suspended, # pipeline.resume() fails. This seems to be happening on # ALL pipelines somehow. except: print "replica is not suspended, cannot resume" waiting_replicas = [] # ---------------------------------------------------------------------- p_replica = re.Pipeline() p_replica.name = 'p_{rid}'.format(rid=rid) md_stg = add_md_stg(rid, cycle) p_replica.add_stages(md_stg) return p_replica