def run_executors_in_parallel( executor_class, assets, fifo_mode=True, delete_workdir=True, parallelize=True, logger=None, result_store=None, optional_dict=None, ): """ Run multiple Executors in parallel. :param executor_class: :param assets: :param fifo_mode: :param delete_workdir: :param parallelize: :param logger: :param result_store: :param optional_dict: :return: """ def run_executor(args): executor_class, asset, fifo_mode, \ delete_workdir, result_store, optional_dict = args executor = executor_class([asset], None, fifo_mode, delete_workdir, result_store, optional_dict) executor.run() return executor # pack key arguments to be used as inputs to map function list_args = [] for asset in assets: list_args.append([ executor_class, asset, fifo_mode, delete_workdir, result_store, optional_dict ]) # map arguments to func if parallelize: try: from pathos.pp_map import pp_map executors = pp_map(run_executor, list_args) except ImportError: # fall back msg = "pathos.pp_map cannot be imported for parallel execution, " \ "fall back to sequential map()." if logger: logger.warn(msg) else: print 'Warning: {}'.format(msg) executors = map(run_executor, list_args) else: executors = map(run_executor, list_args) # aggregate results results = [executor.results[0] for executor in executors] return executors, results
def run_executors_in_parallel(executor_class, assets, fifo_mode=True, delete_workdir=True, parallelize=True, logger=None, result_store=None, optional_dict=None, optional_dict2=None, ): """ Run multiple Executors in parallel. """ def run_executor(args): executor_class, asset, fifo_mode, \ delete_workdir, result_store, optional_dict, optional_dict2 = args executor = executor_class([asset], None, fifo_mode, delete_workdir, result_store, optional_dict, optional_dict2) executor.run() return executor # pack key arguments to be used as inputs to map function list_args = [] for asset in assets: list_args.append( [executor_class, asset, fifo_mode, delete_workdir, result_store, optional_dict, optional_dict2]) # map arguments to func if parallelize: try: from pathos.pp_map import pp_map executors = pp_map(run_executor, list_args) except ImportError: # fall back msg = "pathos.pp_map cannot be imported for parallel execution, " \ "fall back to sequential map()." if logger: logger.warn(msg) else: print 'Warning: {}'.format(msg) executors = map(run_executor, list_args) else: executors = map(run_executor, list_args) # aggregate results results = [executor.results[0] for executor in executors] return executors, results
def format_string_list(_data): def format_string(s): from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import PorterStemmer import re _stopwords_list = stopwords.words('english') _stemmer = PorterStemmer() s = s.lower() # case lowering # non-alphanumeric-character removal s = re.sub('[^0-9a-zA-Z]+', ' ', s) #s = re.sub('[0-9]{1,3}px', ' ', s) #s = re.sub(' [0-9]{1,6} |000', ' ', s) _words_list = filter(lambda x: x not in _stopwords_list, word_tokenize(s)) # stopword removal #_words_list = map(lambda x: _stemmer.stem(x), _words_list) # stem #lemmatizer = WordNetLemmatizer() #_words_list = map(lambda x: lemmatizer.lemmatize(x, pos='v'), _words_list) # lemmatizer return (' ').join(_words_list)#, _pos_tags_list print _data[:10] print '---------------stopword removal--------------' print '---------------stem--------------' _format = pp_map(format_string, _data) print _format[:10] # print '-------------POS tags---------------' # def get_pos_tag(s): # from nltk import pos_tag # from nltk.tokenize import word_tokenize # return pos_tag(word_tokenize(s)) # _format_tokens_pos = pp_map(get_pos_tag, _format) # print _format_tokens_pos[0] return _format
# print the input to screen x = np.arange(N * nodes, dtype=np.float64) print("Input: %s\n" % x) # run sin2 in series, then print to screen print("Running serial python ...") y = map(sin2, x) print("Output: %s\n" % np.asarray(y)) # map sin2 to the workers, then print to screen print("Running mpi4py on %d cores..." % nodes) y = mpi_map(sin2, x, nnodes=nodes) print("Output: %s\n" % np.asarray(y)) # map sin2 to the workers, then print to screen print("Running multiprocesing on %d processors..." % nodes) y = mp_map(sin2, x, nproc=nodes) print("Output: %s\n" % np.asarray(y)) # map sin2 to the workers, then print to screen print("Running parallelpython on %d cpus..." % nodes) y = pp_map(sin2, x, ncpus=nodes, servers=('mycpu.mydomain.com',)) print("Output: %s\n" % np.asarray(y)) # EOF