def _add_output_deps(self, executor, args, kwargs, app_fut, func): logger.debug("Adding output dependencies") outputs = kwargs.get('outputs', []) app_fut._outputs = [] for idx, f in enumerate(outputs): if isinstance(f, File) and not self.check_staging_inhibited(kwargs): # replace a File with a DataFuture - either completing when the stageout # future completes, or if no stage out future is returned, then when the # app itself completes. # The staging code will get a clean copy which it is allowed to mutate, # while the DataFuture-contained original will not be modified by any staging. f_copy = f.cleancopy() outputs[idx] = f_copy logger.debug("Submitting stage out for output file {}".format(repr(f))) stageout_fut = self.data_manager.stage_out(f_copy, executor, app_fut) if stageout_fut: logger.debug("Adding a dependency on stageout future for {}".format(repr(f))) app_fut._outputs.append(DataFuture(stageout_fut, f, tid=app_fut.tid)) else: logger.debug("No stageout dependency for {}".format(repr(f))) app_fut._outputs.append(DataFuture(app_fut, f, tid=app_fut.tid)) # this is a hook for post-task stageout # note that nothing depends on the output - which is maybe a bug # in the not-very-tested stageout system? newfunc = self.data_manager.replace_task_stage_out(f_copy, func, executor) if newfunc: func = newfunc else: logger.debug("Not performing staging for: {}".format(repr(f))) app_fut._outputs.append(DataFuture(app_fut, f, tid=app_fut.tid)) return func
def __call__(self, *args, **kwargs): ''' This is where the call to a python app is handled Args: - Arbitrary Kwargs: - Arbitrary Returns: If outputs=[...] was a kwarg then: App_fut, [Data_Futures...] else: App_fut ''' if type(self.executor) == DataFlowKernel: logger.debug("Submitting to DataFlowKernel : %s", self.executor) app_fut = self.executor.submit(self.func, *args, **kwargs) else: logger.debug("Submitting to Executor: %s", self.executor) app_fut = self.executor.submit(self.func, *args, **kwargs) out_futs = [DataFuture(app_fut, o, parent=app_fut) for o in kwargs.get('outputs', []) ] if out_futs: return app_fut, out_futs else: return app_fut
def stage_in(self, file, site_name=None): """Transport the file from the site of origin to the site. This function returns a DataFuture. Args: - self - file (File) - file to stage in - site_name (str) - a name of a site the file is going to be staged in to. If the site argument is not specified for a file with 'globus' scheme, the file will be staged in to the first site with the "globus" key in a config. """ if file.scheme == 'file': site_name = None elif file.scheme == 'globus': globus_ep = self._get_globus_site(site_name) df = file.get_data_future(globus_ep['site_name']) if df: return df if file.scheme == 'file': f = self.submit(self._file_transfer_in, file) elif file.scheme == 'globus': f = self.submit(self._globus_transfer_in, file, globus_ep) from parsl.app.futures import DataFuture df = DataFuture(f, file) file.set_data_future(df, globus_ep['site_name']) return df
def __call__(self, *args, **kwargs): ''' This is where the call to a python app is handled Args: - Arbitrary Kwargs: - Arbitrary Returns: If outputs=[...] was a kwarg then: App_fut, [Data_Futures...] else: App_fut ''' app_fut = self.executor.submit(self.func, *args, parsl_sites=self.sites, fn_hash=self.fn_hash, cache=self.cache, **kwargs) # logger.debug("App[{}] assigned Task[{}]".format(self.func.__name__, # app_fut.tid)) out_futs = [DataFuture(app_fut, o, parent=app_fut, tid=app_fut.tid) for o in kwargs.get('outputs', [])] app_fut._outputs = out_futs return app_fut
def __call__(self, *args, **kwargs): ''' This is where the call to a Bash app is handled Args: - Arbitrary Kwargs: - Arbitrary Returns: If outputs=[...] was a kwarg then: App_fut, [Data_Futures...] else: App_fut ''' trace_method = False # Update kwargs in the app definition with one's passed in at calltime self.kwargs.update(kwargs) app_fut = self.executor.submit(remote_side_bash_executor, self.func, *args, parsl_sites=self.sites, **self.kwargs) logger.debug("App[%s] assigned Task_id:[%s]" % (self.func.__name__, app_fut.tid) ) out_futs = [DataFuture(app_fut, o, parent=app_fut, tid=app_fut.tid) for o in kwargs.get('outputs', []) ] app_fut._outputs = out_futs return app_fut
def __call__(self, *args, **kwargs): """Handle the call to a Bash app. Args: - Arbitrary Kwargs: - Arbitrary Returns: If outputs=[...] was a kwarg then: App_fut, [Data_Futures...] else: App_fut """ # Update kwargs in the app definition with ones passed in at calltime self.kwargs.update(kwargs) if self.data_flow_kernel is None: dfk = DataFlowKernelLoader.dfk() else: dfk = self.data_flow_kernel app_fut = dfk.submit(wrap_error(remote_side_bash_executor), self.func, *args, executors=self.executors, fn_hash=self.func_hash, cache=self.cache, **self.kwargs) out_futs = [DataFuture(app_fut, o, parent=app_fut, tid=app_fut.tid) for o in kwargs.get('outputs', [])] app_fut._outputs = out_futs return app_fut
def __call__(self, *args, **kwargs): """This is where the call to a python app is handled. Args: - Arbitrary Kwargs: - Arbitrary Returns: If outputs=[...] was a kwarg then: App_fut, [Data_Futures...] else: App_fut """ if self.data_flow_kernel is None: self.data_flow_kernel = DataFlowKernelLoader.dfk() app_fut = self.data_flow_kernel.submit(self.func, *args, executors=self.executors, fn_hash=self.func_hash, cache=self.cache, **kwargs) # logger.debug("App[{}] assigned Task[{}]".format(self.func.__name__, # app_fut.tid)) out_futs = [ DataFuture(app_fut, o, parent=app_fut, tid=app_fut.tid) for o in kwargs.get('outputs', []) ] app_fut._outputs = out_futs return app_fut
def __call__(self, *args, **kwargs): """This is where the call to a python app is handled. Args: - Arbitrary Kwargs: - Arbitrary Returns: App_fut """ if self.data_flow_kernel is None: dfk = DataFlowKernelLoader.dfk() else: dfk = self.data_flow_kernel walltime = self.kwargs.get('walltime') if walltime is not None: self.func = timeout(self.func, walltime) app_fut = dfk.submit(self.func, *args, executors=self.executors, fn_hash=self.func_hash, cache=self.cache, **kwargs) # logger.debug("App[{}] assigned Task[{}]".format(self.func.__name__, # app_fut.tid)) out_futs = [DataFuture(app_fut, o, tid=app_fut.tid) for o in kwargs.get('outputs', [])] app_fut._outputs = out_futs return app_fut
def __call__(self, *args, **kwargs): """Handle the call to a Bash app. Args: - Arbitrary Kwargs: - Arbitrary Returns: If outputs=[...] was a kwarg then: App_fut, [Data_Futures...] else: App_fut """ # Update kwargs in the app definition with one's passed in at calltime self.kwargs.update(kwargs) app_fut = self.executor.submit(remote_side_bash_executor, self.func, *args, parsl_sites=self.sites, fn_hash=self.fn_hash, cache=self.cache, **self.kwargs) out_futs = [ DataFuture(app_fut, o, parent=app_fut, tid=app_fut.tid) for o in kwargs.get('outputs', []) ] app_fut._outputs = out_futs return app_fut
def optionally_stage_in(self, input, func, executor): if isinstance(input, DataFuture): file = input.file_obj.cleancopy() # replace the input DataFuture with a new DataFuture which will complete at # the same time as the original one, but will contain the newly # copied file input = DataFuture(input, file, tid=input.tid) elif isinstance(input, File): file = input.cleancopy() input = file else: return (input, func) replacement_input = self.stage_in(file, input, executor) func = self.replace_task(file, func, executor) return (replacement_input, func)
def _add_output_deps(self, executor, args, kwargs, app_fut, func): logger.debug("Adding output dependencies") outputs = kwargs.get('outputs', []) app_fut._outputs = [] for f in outputs: if isinstance(f, File) and not self.check_staging_inhibited(kwargs): # replace a File with a DataFuture - either completing when the stageout # future completes, or if no stage out future is returned, then when the # app itself completes. logger.debug( "Submitting stage out for output file {}".format(f)) stageout_fut = self.data_manager.stage_out( f, executor, app_fut) if stageout_fut: logger.debug( "Adding a dependency on stageout future for {}".format( f)) app_fut._outputs.append( DataFuture(stageout_fut, f, tid=app_fut.tid)) else: logger.debug("No stageout dependency for {}".format(f)) app_fut._outputs.append( DataFuture(app_fut, f, tid=app_fut.tid)) # this is a hook for post-task stageout # note that nothing depends on the output - which is maybe a bug # in the not-very-tested stageout system? newfunc = self.data_manager.replace_task_stage_out( f, func, executor) if newfunc: func = newfunc else: logger.debug("Not performing staging for: {}".format(f)) app_fut._outputs.append(DataFuture(app_fut, f, tid=app_fut.tid)) return func
def __call__(self, *args, **kwargs): ''' This is where the call to a python app is handled Args: - Arbitrary Kwargs: - Arbitrary Returns: If outputs=[...] was a kwarg then: App_fut, [Data_Futures...] else: App_fut ''' cmd_line = self._trace_cmdline(*args, **kwargs) self.kwargs.update(kwargs) self.executable = cmd_line #.format(**kwargs) if type(self.executor) == DataFlowKernel: logger.debug("Submitting to DataFlowKernel : %s", self.executor) #app_fut = self.executor.submit(self._callable, *args, **kwargs) app_fut = self.executor.submit(bash_executor, cmd_line, *args, **self.kwargs) else: logger.debug("Submitting to Executor: %s", self.executor) #app_fut = self.executor.submit(self._callable, *args, **kwargs) app_fut = self.executor.submit(bash_executor, cmd_line, *args, **self.kwargs) out_futs = [DataFuture(app_fut, o, parent=app_fut) for o in kwargs.get('outputs', []) ] if out_futs: return app_fut, out_futs else: return app_fut
''' Submit a staging request. ''' return self.executor.submit(*args, **kwargs) def scale_in(self, blocks, *args, **kwargs): pass def scale_out(self, *args, **kwargs): pass def shutdown(self, block=False): return self.executor.shutdown(wait=block) def scaling_enabled(self): return self._scaling_enabled if __name__ == "__main__": from parsl.data_provider.files import File from parsl.app.futures import DataFuture dm = DataManager(config={'a': 1}) f = File("/tmp/a.txt") print(type(f), f) fut = dm.submit(f.stage_in, "foo") df = DataFuture(fut, f, parent=None, tid=None) print(df)