def run(self, **kwargs): """ Do all the computation here. :return: """ if self.logger: self.logger.info( "For each asset, if {type} result has not been generated, run " "and generate {type} result...".format(type=self.executor_id)) if 'parallelize' in kwargs: parallelize = kwargs['parallelize'] else: parallelize = False assert isinstance(parallelize, bool) if 'processes' in kwargs and kwargs['processes'] is not None: assert parallelize is True, 'Cannot specify processes if parallelize is False.' processes = kwargs['processes'] else: processes = None assert processes is None or (isinstance(processes, int) and processes >= 1) if parallelize: # create locks for unique assets (uniqueness is identified by str(asset)) map_asset_lock = {} locks = [] for asset in self.assets: asset_str = str(asset) if asset_str not in map_asset_lock: map_asset_lock[asset_str] = multiprocessing.Lock() locks.append(map_asset_lock[asset_str]) # pack key arguments to be used as inputs to map function list_args = [] for asset, lock in zip(self.assets, locks): list_args.append( [asset, lock]) def _run(asset_lock): asset, lock = asset_lock lock.acquire() result = self._run_on_asset(asset) lock.release() return result self.results = parallel_map(_run, list_args, processes=processes) else: self.results = list(map(self._run_on_asset, self.assets))
def run(self, **kwargs): """ Do all the computation here. :return: """ if self.logger: self.logger.info( "For each asset, if {type} result has not been generated, run " "and generate {type} result...".format(type=self.executor_id)) if 'parallelize' in kwargs: parallelize = kwargs['parallelize'] else: parallelize = False if parallelize: # create locks for unique assets (uniqueness is identified by str(asset)) map_asset_lock = {} locks = [] for asset in self.assets: asset_str = str(asset) if asset_str not in map_asset_lock: map_asset_lock[asset_str] = multiprocessing.Lock() locks.append(map_asset_lock[asset_str]) # pack key arguments to be used as inputs to map function list_args = [] for asset, lock in zip(self.assets, locks): list_args.append( [asset, lock]) def _run(asset_lock): asset, lock = asset_lock lock.acquire() result = self._run_on_asset(asset) lock.release() return result self.results = parallel_map(_run, list_args) else: self.results = map(self._run_on_asset, self.assets)
def run_executors_in_parallel( executor_class, assets, fifo_mode=True, delete_workdir=True, parallelize=True, logger=None, result_store=None, optional_dict=None, optional_dict2=None, ): """ Run multiple Executors in parallel. """ # construct an executor object just to call _assert_assets() only executor_class(assets, logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, optional_dict=optional_dict, optional_dict2=optional_dict2) # create locks for unique assets (uniqueness is identified by str(asset)) map_asset_lock = {} locks = [] for asset in assets: asset_str = str(asset) if asset_str not in map_asset_lock: map_asset_lock[asset_str] = multiprocessing.Lock() locks.append(map_asset_lock[asset_str]) # pack key arguments to be used as inputs to map function list_args = [] for asset, lock in zip(assets, locks): list_args.append([ executor_class, asset, fifo_mode, delete_workdir, result_store, optional_dict, optional_dict2, lock ]) def run_executor(args): executor_class, asset, fifo_mode, delete_workdir, \ result_store, optional_dict, optional_dict2, lock = args lock.acquire() executor = executor_class([asset], None, fifo_mode, delete_workdir, result_store, optional_dict, optional_dict2) executor.run() lock.release() return executor # run if parallelize: executors = parallel_map(run_executor, list_args, processes=None) else: executors = list(map(run_executor, list_args)) # aggregate results results = [executor.results[0] for executor in executors] return executors, results
def run_executors_in_parallel(executor_class, assets, fifo_mode=True, delete_workdir=True, parallelize=True, logger=None, result_store=None, optional_dict=None, optional_dict2=None, ): """ Run multiple Executors in parallel. """ # construct an executor object just to call _assert_assets() only executor_class( assets, logger, fifo_mode=fifo_mode, delete_workdir=True, result_store=result_store, optional_dict=optional_dict, optional_dict2=optional_dict2 ) # create locks for unique assets (uniqueness is identified by str(asset)) map_asset_lock = {} locks = [] for asset in assets: asset_str = str(asset) if asset_str not in map_asset_lock: map_asset_lock[asset_str] = multiprocessing.Lock() locks.append(map_asset_lock[asset_str]) # pack key arguments to be used as inputs to map function list_args = [] for asset, lock in zip(assets, locks): list_args.append( [executor_class, asset, fifo_mode, delete_workdir, result_store, optional_dict, optional_dict2, lock]) def run_executor(args): executor_class, asset, fifo_mode, delete_workdir, \ result_store, optional_dict, optional_dict2, lock = args lock.acquire() executor = executor_class([asset], None, fifo_mode, delete_workdir, result_store, optional_dict, optional_dict2) executor.run() lock.release() return executor # run if parallelize: executors = parallel_map(run_executor, list_args, processes=None) else: executors = map(run_executor, list_args) # aggregate results results = [executor.results[0] for executor in executors] return executors, results