def runTrainTopTestAll(aType): logging.info('Running ' + aType) if aType == 'ALL': aND = np.asarray(allTop) elif aType == 'INTER': aND = np.asarray(interTop) elif aType == 'LOCUS': aND = np.asarray(locusTop) elif aType == 'BLIA': aND = np.asarray(bliaTop) elif aType == 'BugLocator': aND = np.asarray(bugLTop) elif aType == 'BRTracer': aND = np.asarray(brtTop) elif aType == 'AMALGAM': aND = np.asarray(amalTop) elif aType == 'BLUiR': aND = np.asarray(bluirTop) elif aType == 'NALL': aND = np.asarray(allNTop) from sklearn.model_selection import KFold if aType == 'AMALGAM': logging.info(len(aND)) kf = KFold(n_splits=len(aND), shuffle=True) else: kf = KFold(n_splits=10,shuffle=True) # Define the split - into 2 folds if aType != 'BLUiR': iTer = 0 tuples = [] for train_index, test_index in kf.split(aND): X_train, X_test = aND[train_index], aND[test_index] aTuple = X_train, X_test, aType, iTer,simiFiles,'_TESTALL' tuples.append(aTuple) iTer += 1 pool = Pool(core,maxtasksperchild=1) pool.map_async(corePredict, [link for link in tuples], chunksize=1) pool.close() pool.join() else: aTuple = aND, None, aType, 0, simiFiles, '_TESTALL' corePredict(aTuple, True)
def execute_all(runs, mode="a"): pool = Pool(processes=multiprocessing.cpu_count()) result = pool.map_async(execute, runs, chunksize=1) start_time = time.time() while not result.ready(): completed = len(runs) - result._number_left if completed > 0: rate = 60.0 * completed / (time.time() - start_time) percent = 100.0 * completed / len(runs) print( "%04d/%04d %4.1f%% %s %4d/min ETA: %3.1f\r" % ( completed, len(runs), percent, progressbar(percent / 100), rate, 1.0 / rate * result._number_left, ), end=" ", ) sys.stdout.flush() time.sleep(0.2) print("\n") pool.close()
def parallel_variability_analysis(tmodel, kind='reactions', proc_num=BEST_THREAD_RATIO): """ WIP. :param tmodel: :param kind: :param proc_num: :return: """ raise (NotImplementedError) objective = tmodel.objective if kind == Reaction or kind.lower() in ['reaction', 'reactions']: these_vars = tmodel.reactions else: these_vars = tmodel.get_variables_of_type(kind) func = partial(_variability_analysis_element, tmodel) pool = Pool(processes=proc_num) async_result = pool.map_async(func, these_vars) pool.close() pool.join() # aggregated_result = pd.DataFrame(async_result.get(), # columns = ['minimize','maximize']) tmodel.objective = objective return async_result
def run(): nkeys = int(input('Enter number of keys: ')) keys = input('Enter the keys: ').strip().split() assert len(keys) == nkeys, 'len(keys) != nkeys' p = list(map(float, input('Enter success probabilities (p): ').split())) assert len(p) == nkeys, 'len(p) != nkeys' q = list(map(float, input('Enter failure probabilities (q): ').split())) assert len(q) == (nkeys + 1), 'len(q) != nkeys + 1' key_array = [Key(val, p1) for val, p1 in zip(keys, p)] all_permutations = it.permutations(key_array) pool = Pool(processes=8) start_time = time.time() result = pool.map_async(solve, [OBSTSolver(keys=perm, q=q[:]) for perm in all_permutations]) pool.close() pool.join() end_time = time.time() results = result.get(1) print("\n*** OBST Calculated ***") min_root, min_cost = min(results, key=lambda x: x[1]) print("Minimum Cost: {}\n".format(min_cost)) min_root.inorder() print("") print("Time taken: {} ms.".format((end_time - start_time)*1000))
def full_languages_modules_run(langfilter: LIST_STR, modfilter: LIST_STR, brows: LIST_STR) -> None: """Run the selected set of modules and locales, logging results, and saving a screenshot in case of failure. By default, will run all of them.""" output = '\n"START: {0}", {1}\n'.format( get_time(), ','.join(modfilter).upper()) # header row. pool = Pool(cpu_count() * 2) try: asy = pool.map_async( do_locale, [(x, LANGS, MOD_STEM, CMOD_STEM, modfilter, b, BROWSERS[b], ARGS) for x in langfilter for b in brows]) while True: if asy.ready(): break time.sleep( 1 ) # Poolmapwaiting blocks KeyboardInterrupts, so don't do that. except KeyboardInterrupt: pool.terminate() raise results = asy.get() output += '\n'.join(results) # Each locale's row. output += '\n"FINISH: {0}"\n\n'.format(get_time()) # Footer row. try: with open(RESULTS_FILE, mode='a', encoding='UTF-8') as log: log.write(output) except PermissionError: print('In future, be sure to not leave the log file open.') print('That tends to lock it, so now it cannot be written to.') print('\n\nNow, you have to try to read raw CSV from a console:\n\n') print(output)
def main(): url_list = get_url_list() pool = Pool(processes=config.N_WORKERS) result = pool.map_async(grab_data, url_list) write_md(result.get())
def _execute_import(self, files_to_scan: List[str]) -> (Set[str], Set[str]): new_or_changed_files = set() undetected_files = set() self._files_count = self._count_files_to_scan() self.emit_event_main_thread("scan-progress", 0.05) self._progress = 0 pool = Pool() while True: try: job = pool.map_async(import_file, itertools.islice(files_to_scan, CHUNK_SIZE)) except StopIteration as e: log.warning("importer", e, "_execute_import raised a stop iteration.") break self._wait_for_job_to_complete(job) import_result = job.get() undetected_files.update({file for file in import_result if isinstance(file, str)}) media_files = {file for file in import_result if isinstance(file, MediaFile)} new_or_changed_files.update((file.path for file in media_files)) self._progress += CHUNK_SIZE if len(media_files) != 0: self._database_importer.insert_many(media_files) if self._progress >= self._files_count: break pool.close() return new_or_changed_files, undetected_files
def async_multiprocess(my_list): pool = Pool() start_time = time() result = pool.map_async(add_one, my_list) pool.close() print(async_multiprocess.__name__, '\nRequired time: {:.6f}\n'.format(time() - start_time), result.get()[-5:], end='\n\n')
class MultiprocessEvaluator(Evaluator[S]): def __init__(self, processes: int = None): super().__init__() self.pool = Pool(processes) def evaluate(self, solution_list: List[S], problem: Problem) -> List[S]: # return self.pool.map(functools.partial(evaluate_solution, problem=problem), solution_list) result = self.pool.map_async( functools.partial(evaluate_solution, problem=problem), solution_list) return result.get(timeout=1000)
def run(self): cases = self.get_test_case() # 定义一个进程池 pool = Pool(processes=len(cases)) result.append(pool.map_async(self.init_driver, cases.values())) pool.close() pool.join() while not q.empty(): comm.Template.set_middle(q.get())
def poolHandle(zip,nid): if DEBUG_LEVEL ==0 : p = Pool(80) for sub in zip.namelist(): fobj = getSubFobj(zip,sub) if fobj != None : p.apply_async(handleSub,args=(fobj,nid)) p.close() p.join() elif DEBUG_LEVEL ==1 : p = billiard.Pool() _finalizers.append(Finalize(p, p.terminate)) try: p.map_async(handleSub, [(getSubFobj(zip,sub),nid) for sub in zip.namelist()]) p.close() p.join() finally: p.terminate() else : for sub in zip.namelist(): fobj = getSubFobj(zip,sub) if fobj != None : handleSub(fobj,nid) zip.close()
def download_chunks(self, max_workers=5): print('Will now download chunks.') original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN) executor = Pool(max_workers) signal.signal(signal.SIGINT, original_sigint_handler) try: r = executor.map_async(self.get, self.urls) result = list(r.get(43200)) DownloadResultProcessor.process_and_print(result) except KeyboardInterrupt: executor.terminate() else: executor.close() executor.join()
def sprinter(self): """ Called when parallelize is True. This function will generate the file names in a directory tree by adding directories to a Queue and continuously exploring directories in the Queue until Queue is emptied. Significantly faster than crawler method for larger directory trees. """ self._printer('Multiprocess Walk') # Loop through directories in case there is more than one (1) for directory in self.directory: self._get_root_files( directory ) # Add file within root directory if filepaths is empty # acquire the list of paths first_level_dirs = next(os.walk(directory))[1] for path in first_level_dirs: self.unsearched.put((directory, path)) self._printer('Pool Processing STARTED') pool = Pool(self.pool_size) pool.map_async(self.parallel_worker, range(self.pool_size)) pool.close() self.unsearched.join() self._printer('Pool Processing ENDED') return self.filepaths
def main() -> None: """Main function""" if not args.token: logging.error( "GitHub Token is missing. Please pass your GitHub token key as a --token=xxxxxx" ) sys.exit(1) return url_list = get_url_list() pool = Pool(processes=args.workers) result = pool.map_async(grab_data, url_list) write_md(result.get())
def execute_all(runs): print('starting runs') nproc = multiprocessing.cpu_count() pool = Pool(processes=nproc) result = pool.map_async(execute, runs, chunksize=1) start_time = time.time() while not result.ready(): completed = len(runs) - result._number_left if completed > 0: rate = 60.0 * completed / (time.time() - start_time) percent = 100.0 * completed / len(runs) print('%04d/%04d %4.1f%% %s %4d/min ETA: %3.1f\n' % (completed, len(runs), percent, progressbar( percent / 100), rate, 1.0 / rate * result._number_left)) sys.stdout.flush() time.sleep(1) print('\ncompleted', len(runs) - result._number_left) pool.close()
def image_urls(self): """ Iterates over json obj, gets image links Creates pool of workers, creates new workers """ json_obj = self.jsonify() for post in json_obj["posts"]: if "ext" in post: self.total_count.value += 1 try: self.thread_name = self.args.name except (KeyError, NameError): self.thread_name = json_obj["posts"][0]["sub"].replace(" ", "_") else: self.thread_name = str(json_obj["posts"][0]["no"]) for post in json_obj["posts"]: if "ext" in post: filename = post["tim"] + post["ext"] image_url = "https://8ch.net/{board}/src/{file}".format(board=self.board, file=filename) self.downloads.append((image_url, filename)) self.download_image(image_url, filename) with self.counter.get_lock(): self.counter.value += 1 update_progress(self.counter.value, self.total_count.value) pool = Pool(self.workers) pool_map = pool.map_async(self.download_image, self.downloads) try: pool_map.get(0xFFFF) except KeyboardInterrupt: print("Aborting") pool.terminate() pool.join() else: pool.close() pool.join()
def image_urls(self): """ Iterates over json obj, gets image links Creates pool of workers, creates new workers """ json_obj = self.jsonify() for post in json_obj['posts']: if 'ext' in post: self.total_count.value += 1 self.thread_name = json_obj['posts'][0]['semantic_url'] for post in json_obj['posts']: if 'ext' in post: filename = str(post['tim']) + post['ext'] image_url = 'https://i.4cdn.org/{board}/{file}'.format( board=self.board, file=filename) self.filename.append(filename) self.downloads.append(image_url) self.download_image(image_url, filename) with self.counter.get_lock(): self.counter.value += 1 update_progress(self.counter.value, self.total_count.value) manager = Manager() pool_data = manager.list(self.downloads) partial_data = partial(self.download_image, pool_data) pool = Pool(self.workers) pool_map = pool.map_async(partial_data, self.filename) try: pool.close() pool.join() except KeyboardInterrupt: print("Aborting") pool.terminate() pool.join()
def parallel_variability_analysis(tmodel, kind='reactions', proc_num=BEST_THREAD_RATIO): """ WIP. :param tmodel: :param kind: :param proc_num: :return: """ objective = tmodel.objective if kind == Reaction or kind.lower() in ['reaction', 'reactions']: these_vars = tmodel.reactions else: these_vars = tmodel.get_variables_of_type(kind) aggregate_results = {} for what in ("min", "max"): if proc_num > 1: #chunk_size = len(these_vars) //proc_num pool = Pool(processes=proc_num, initializer=mute) func = partial(_variability_analysis_element, tmodel, sense=what) pool = Pool(processes=proc_num, initializer=mute) async_result = pool.map_async(func, these_vars) pool.close() pool.join() else: print("Multiple threads need to be specified") raise (NotImplementedError) aggregate_results[what] = async_result.get() dataframe_results = pd.DataFrame(aggregate_results) return (dataframe_results)
def map_async(self, func, iterable, chunksize=None, callback=None): return Pool.map_async(self, LogExceptions(func), iterable, chunksize, callback)
class LLTInf(object): """Obtains a decision tree that classifies the given labeled traces. traces : a Traces object The set of labeled traces to use as training set depth : integer Maximum depth to be reached optimize_impurity : function. Optional, defaults to optimize_inf_gain A function that obtains the best parameters for a test in a given node according to some impurity measure. The should have the following prototype: optimize_impurity(traces, primitive, rho, disp) : (primitive, impurity) where traces is a Traces object, primitive is a depth 2 STL formula, rho is a list with the robustness degree of each trace up until this node in the tree and disp is a boolean that switches output display. The impurity returned should be so that the best impurity is the minimum one. stop_condition : list of functions. Optional, defaults to [perfect_stop] list of stopping conditions. Each stopping condition is a function from a dictionary to boolean. The dictionary contains all the information passed recursively during the construction of the decision tree (see arguments of lltinf_). disp : a boolean Switches displaying of debuggin output Returns a DTree object. TODO: Fix comments """ def __init__( self, depth=1, primitive_factory=llt.make_llt_primitives, optimize_impurity=impurity.ext_inf_gain, stop_condition=None, redo_after_failed=1, optimizer_args=None, times=None, fallback_impurity=impurity.inf_gain, log=False, ): self.depth = depth self.primitive_factory = primitive_factory self.optimize_impurity = optimize_impurity self.fallback_impurity = fallback_impurity if stop_condition is None: self.stop_condition = [perfect_stop] else: self.stop_condition = stop_condition if optimizer_args is None: optimizer_args = {} self.optimizer_args = optimizer_args self.times = times self.interpolate = times is not None if self.interpolate and len(self.times) > 1: self.tinter = self.times[1] - self.times[0] else: self.tinter = None self.tree = None self.redo_after_failed = redo_after_failed self._partial_add = 0 self.log = log if "workers" not in self.optimizer_args: self.pool = Pool(initializer=_pool_initializer) def pool_map(func, iterable): try: return self.pool.map_async(func, iterable).get(timeout=120) except KeyboardInterrupt: self.pool.terminate() self.pool.join() raise KeyboardInterrupt() self.pool_map = pool_map self.optimizer_args["workers"] = self.pool_map def __del__(self): if hasattr(self, "pool"): self.pool.terminate() self.pool.join() def __exit__(self): if hasattr(self, "pool"): self.pool.terminate() self.pool.join() def fit(self, traces, disp=False): np.seterr(all="ignore") self.tree = self._lltinf(traces, None, self.depth, disp=disp) return self def fit_partial(self, traces, disp=False): if self.tree is None: return self.fit(traces, disp=disp) else: preds = self.predict(traces.signals) failed = set() for i in range(len(preds)): leaf = self.tree.add_signal(traces.signals[i], traces.labels[i], self.interpolate, self.tinter) if preds[i] != traces.labels[i]: failed.add(leaf) # logger.debug("Failed set: {}".format(failed)) self._partial_add += len(failed) if self._partial_add // self.redo_after_failed > 0: # logger.debug("Redoing tree") self._partial_add = 0 return self.fit(self.tree.traces, disp=disp) else: for leaf in failed: # TODO don't redo whole node, only leaf tree = self._lltinf( leaf.traces, leaf.robustness, self.depth - leaf.level(), disp=disp, ) old_tree = leaf.copy() leaf.set_tree(tree) # FIXME only for perfect_stop preds = self.predict(traces.signals) if not np.array_equal(preds, traces.labels): self._partial_add = 0 return self.fit(self.tree.traces, disp=disp) return self def predict(self, signals): if self.tree is not None: return np.array([ self.tree.classify(s, self.interpolate, self.tinter) for s in signals ]) else: raise ValueError("Model not fit") def get_formula(self): if self.tree is not None: return self.tree.get_formula() else: raise ValueError("Model not fit") def _debug(self, *args): if self.log: logger.debug(*args) def _lltinf(self, traces, rho, depth, disp=False, override_impurity=None): """Recursive call for the decision tree construction. See lltinf for information on similar arguments. rho : list of numerics List of robustness values for each trace up until the current node depth : integer Maximum depth to be reached. Decrements for each recursive call """ # Stopping condition if any( [stop(self, traces, rho, depth) for stop in self.stop_condition]): return None # Find primitive using impurity measure self._debug( f"Creating primitives at depth {depth} over {len(traces)} traces") primitives = self.primitive_factory(traces.signals, traces.labels) if override_impurity is None: impurity = self.optimize_impurity else: impurity = override_impurity self._debug( f"Finding best primitive at depth {depth} over {len(traces)} traces" ) primitive, impurity = _find_best_primitive( traces, primitives, rho, impurity, disp, self.optimizer_args, times=self.times, interpolate=self.interpolate, tinter=self.tinter, ) if disp: print("Best: {} ({})".format(primitive, impurity)) self._debug(f"Best primitive found: {primitive} (imp: {impurity})") # Classify using best primitive and split into groups prim_rho = [ primitive.score(model) for model in traces.models(self.interpolate, self.tinter) ] if rho is None: rho = [np.inf for i in traces.labels] tree = DTree(primitive, traces, rho) def split(prim_rho): sat, unsat = [], [] for i, rho in enumerate(prim_rho): if rho >= 0: sat.append(i) else: unsat.append(i) return sat, unsat # [prim_rho, rho, signals, label] # sat_, unsat_ = split_groups( # list(zip(prim_rho, rho, *traces.as_list())), lambda x: x[0] >= 0 # ) sat_, unsat_ = split(prim_rho) self._debug(f"Split: {len(sat_)}/{len(unsat_)}") # pure_wrong = all([t[3] <= 0 for t in sat_]) or all([t[3] >= 0 for t in unsat_]) # pure_right = all([t[3] >= 0 for t in sat_]) or all([t[3] <= 0 for t in unsat_]) sat_right = len([i for i in sat_ if traces.labels[i] >= 0]) sat_wrong = len(sat_) - sat_right unsat_right = len([i for i in unsat_ if traces.labels[i] <= 0]) unsat_wrong = len(unsat_) - unsat_right # Switch sat and unsat if labels are wrong. No need to negate prim rho since # we use it in absolute value later if sat_right * unsat_right == 0 or (sat_wrong * unsat_wrong != 0 and sat_right < unsat_wrong): self._debug(f"Inverting primitive") sat_, unsat_ = unsat_, sat_ tree.primitive.negate() # No further classification possible if len(sat_) == 0 or len(unsat_) == 0: self._debug("No further classification possible") if override_impurity is None: self._debug("Attempting to classify using impurity fallback") return self._lltinf( traces, rho, depth, disp=disp, override_impurity=self.fallback_impurity, ) else: return None # Redo data structures sat_traces, unsat_traces = [ traces.subset(traces, idxs) for idxs in [sat_, unsat_] ] sat_rho, unsat_rho = [ np.amin( [np.abs([prim_rho[i] for i in idxs]), [rho[i] for i in idxs]], 0) for idxs in [sat_, unsat_] ] # sat, unsat = [ # (Traces(*group[2:]), np.amin([np.abs(group[0]), group[1]], 0)) # for group in [list(zip(*sat_)), list(zip(*unsat_))] # ] # Recursively build the tree tree.left = self._lltinf(sat_traces, sat_rho, depth - 1, disp=disp) tree.right = self._lltinf(unsat_traces, unsat_rho, depth - 1, disp=disp) return tree
class TqaCore(object): def __init__(self, ranker_opts, reader_opts, reuser_opts, num_workers=None, online=True): start = time.time() self.online = online if self.online: self.session = requests.Session() self.adapter = HTTPAdapter(pool_connections=5, pool_maxsize=5, max_retries=5) self.session.mount('http://', self.adapter) self.session.mount('https://', self.adapter) self.header = { 'Content-Type': 'application/x-www-form-urlencode', 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' } for key, value in enumerate(self.header): capability_key = 'phantomjs.page.customHeaders.{}'.format(key) webdriver.DesiredCapabilities.PHANTOMJS[capability_key] = value self.browser = webdriver.PhantomJS( executable_path='./phantomjs', service_log_path=os.path.devnull) logger.info('Initializing reuser...') bin_path = reuser_opts.get('embedded_corpus_bin_path') threshold = reuser_opts.get('threshold') self.matcher = FastTextMatcher(bin_path, threshold) logger.info('Initializing document rankers...') tfidf_model_paths = ranker_opts.get('tfidf_model_paths') self.tfidf_rank_k = ranker_opts.get('tfidf_rank_k', DEFAULTS['tfidf_rank_k']) self.rankers = {} for tfidf_model_path in tfidf_model_paths: db_table = os.path.basename(tfidf_model_path).split("_")[0] self.rankers[db_table] = TfidfRanker(tfidf_model_path) logger.info('Initializing document reader...') model_path = reader_opts.get('reader_model_path') self.reader = load_model(model_path, new_args=None) embedded_corpus_path = reader_opts.get('embedded_corpus_path', None) if embedded_corpus_path: logger.info('Expanding dictionary...') words = get_embedded_words(embedded_corpus_path) added_words, _ = self.reader.expand_dictionary(words, chars=None) self.reader.load_embeddings(added_words, embedded_corpus_path) use_cuda = reader_opts.get('use_cuda', None) and torch.cuda.is_available() if use_cuda: self.reader.cuda() self.top_k_answers = reader_opts.get('top_k_answers', DEFAULTS['top_k_answers']) logger.info('Initializing tokenizer and retriever...') annotators = set() if self.reader.args.use_pos: annotators.add('pos') if self.reader.args.use_lemma: annotators.add('lemma') if self.reader.args.use_ner: annotators.add('ner') tokenizer_opts = { 'language': self.reader.args.language, 'annotators': annotators, # 'timeout': 10000, } self.num_workers = num_workers self.pool = Pool(num_workers, initializer=pool_init, initargs=(tokenizer_opts, )) end = time.time() logger.info('Server start elapse: {min}min {sec}sec'.format( min=int(end - start) // 60, sec=int(end - start) % 60)) def reuse(self, questions): ids = [] titles = [] descriptions = [] for question in questions: ids.append(question['id']) titles.append(question['title']) descriptions.append(question['desc']) # q_tokens包含标题和描述 q_tokens = self.pool.map_async( tokenize, [titles[i] + ' ' + descriptions[i] for i in range(0, len(titles))]) q_tokens = q_tokens.get() score, index = self.matcher.match(q_tokens, titles, descriptions) return ids[index], score def answer(self, question_title, question_all): start_time = time.time() logger.info('Processing question: %s...' % question_title) logger.info('Retrieving top %d documents...' % self.tfidf_rank_k) results = None if self.online: try: results = self.online_rank(question_title=question_title, question_all=question_all) except: results = None if not results: with ThreadPool(self.num_workers) as threads: _rank = partial(self.rank, question_title=question_title, question_all=question_all) results = threads.map(_rank, self.rankers.keys()) logger.info('Answer elapse = %d' % (time.time() - start_time)) return results def answerOne(self, question_title, question_all, d_tokens, d_ids): logger.info("Tokenizing question...") q_tokens = self.pool.map_async(tokenize, [question_title]) q_tokens = q_tokens.get() examples = [] for i in range(len(d_tokens)): examples.append({ 'id': d_ids[i], 'qtext': q_tokens[0].words(), 'qlemma': q_tokens[0].lemma(), 'dtext': d_tokens[i].words(), 'dlemma': d_tokens[i].lemma(), 'dpos': d_tokens[i].pos(), 'dner': d_tokens[i].ner(), }) logger.info("Batchify...") examples_in_batch = utils.batchify([ utils.vectorize(example, self.reader, single_answer=False) for example in examples ]) start, end, score = self.reader.predict(examples_in_batch, self.top_k_answers) # 从start, end生成答案 results = [] for i in range(len(start)): print(d_ids[i]) for j in range(len(start[i])): answer = d_tokens[i].slice(start[i][j], end[i][j] + 1).untokenize() text = d_tokens[i].answer_sentence(start[i][j], end[i][j] + 1).untokenize() results.append({ 'score': score[i][j].item(), 'answer': answer, 'text': text, 'id': d_ids[i] }) return results def online_rank(self, question_title, question_all): question_title = re.sub( '[\s+\.\!\/_,$%^*(+\"\')]+|[+——()?【】“”!,。?、~@#¥%……&*()]+', "", question_title) url = 'https://so.csdn.net/so/search/s.do?q=' + quote( question_title) + '&t=blog' self.browser.get(url) html = self.browser.page_source soup = BeautifulSoup(html, 'html.parser') link = soup.find_all('dl', {"class": "search-list J_search"})[0] \ .find('dd', {'class': 'search-link'}) \ .find('a')['href'] logger.info('First Blog: %s' % link) resp = self.session.request('GET', link, params=None) soup = BeautifulSoup(resp.content, "html.parser") title = soup.find('h1', {'class': 'title-article'}).get_text() content = soup.find(id="article_content").get_text() content = re.sub(r"\t+|\n+|\r+", "", content) # 去除非空格的空白符 content = re.sub(r"\s{2,}", " ", content) # print(content) logger.info("Tokenizing document...") d_tokens = self.pool.map_async(tokenize, [content]) d_tokens = d_tokens.get() return self.answerOne(question_title, question_all, d_tokens, ['blog@' + link + '@' + title]) def rank(self, db_table, question_title, question_all): logger.info("Finding closest documents...") result = [ self.rankers[db_table].closest_docs(query=question_all, k=self.tfidf_rank_k) ] documents_ids, documents_scores = zip(*result) documents_ids = documents_ids[0] documents_scores = documents_scores[0] print(db_table, documents_ids, documents_scores) if len(documents_ids) == 0: return None logger.info("Tokenizing document...") _build_tokens = partial(build_tokens, db_table=db_table) d_rank_k_tokens = self.pool.map_async(_build_tokens, documents_ids) d_rank_k_tokens = d_rank_k_tokens.get() return self.answerOne(question_title, question_all, d_rank_k_tokens, documents_ids)
}, { 'type': 'ineq', 'fun': con_b3 }) res = minimize(turbine_opts, x0, method='SLSQP', bounds=bnds, constraints=cons) print(-res['fun'], num) return res p = Pool(processes=1) try: results = p.map_async(optimize, variables).get(9999999) except KeyboardInterrupt: p.terminate() sys.exit('KeyboardInterrupt') p.close() p.join() hours = int((time.time() - start_time) / 3600) minutes = int((time.time() - start_time - hours * 3600) / 60) seconds = time.time() - start_time - hours * 3600 - minutes * 60 print('Turbine calculations done in', hours, 'h', minutes, 'm', seconds, 's') results.sort(key=lambda x: -x['fun'])
def compress_cso(fname_in, fname_out, level): fin, fout = open_input_output(fname_in, fname_out) fin.seek(0, os.SEEK_END) total_bytes = fin.tell() fin.seek(0) header_size, block_size, ver, align = 0x18, 0x800, 1, DEFAULT_ALIGN magic = ZISO_MAGIC if USE_LZ4 else CISO_MAGIC # We have to use alignment on any CSO files which > 2GB, for MSB bit of index as the plain indicator # If we don't then the index can be larger than 2GB, which its plain indicator was improperly set if total_bytes >= 2 ** 31 and align == 0: align = 1 header = generate_cso_header(magic, header_size, total_bytes, block_size, ver, align) fout.write(header) total_block = total_bytes // block_size index_buf = [0 for i in range(total_block + 1)] fout.write(b"\x00\x00\x00\x00" * len(index_buf)) show_comp_info(fname_in, fname_out, total_bytes, block_size, align, level) write_pos = fout.tell() percent_period = total_block // 100 percent_cnt = 0 if MP: pool = Pool() else: pool = None block = 0 while block < total_block: if MP: percent_cnt += min(total_block - block, MP_NR) else: percent_cnt += 1 if percent_cnt >= percent_period and percent_period != 0: percent_cnt = 0 if block == 0: print("compress %3d%% avarage rate %3d%%\r" % ( block // percent_period , 0), file=sys.stderr, end="") else: print("compress %3d%% avarage rate %3d%%\r" % ( block // percent_period , 100 * write_pos // (block * 0x800)), file=sys.stderr, end="") if MP: iso_data = [(fin.read(block_size), level) for i in range(min(total_block - block, MP_NR))] cso_data_all = pool.map_async(zip_compress_mp, iso_data).get(9999999) for i in range(len(cso_data_all)): write_pos = set_align(fout, write_pos, align) index_buf[block] = write_pos >> align cso_data = cso_data_all[i] if 100 * len(cso_data) // len(iso_data[i][0]) >= min(COMPRESS_THREHOLD, 100): cso_data = iso_data[i][0] index_buf[block] |= 0x80000000 # Mark as plain elif index_buf[block] & 0x80000000: print("Align error, you have to increase align by 1 or CFW won't be able to read offset above 2 ** 31 bytes") sys.exit(1) fout.write(cso_data) write_pos += len(cso_data) block += 1 else: iso_data = fin.read(block_size) try: cso_data = zip_compress(iso_data, level) except zlib.error as e: print("%d block: %s" % (block, e)) sys.exit(-1) write_pos = set_align(fout, write_pos, align) index_buf[block] = write_pos >> align if 100 * len(cso_data) // len(iso_data) >= COMPRESS_THREHOLD: cso_data = iso_data index_buf[block] |= 0x80000000 # Mark as plain elif index_buf[block] & 0x80000000: print("Align error, you have to increase align by 1 or CFW won't be able to read offset above 2 ** 31 bytes") sys.exit(1) fout.write(cso_data) write_pos += len(cso_data) block += 1 # Last position (total size) index_buf[block] = write_pos >> align # Update index block fout.seek(len(header)) for i in index_buf: idx = pack('I', i) # assert(len(idx) == 4) fout.write(idx) print("ciso compress completed , total size = %8d bytes , rate %d%%" % (write_pos, (write_pos * 100 // total_bytes))) fin.close() fout.close()
for trial in xrange(trials): for budget in budgets: run = get_run(budget, algo) runs.append(run) random.shuffle(runs) def execute(run): # print run output = subprocess.check_output(run['arguments']) f = open(run['filename'], 'a') f.write(output) f.close() pool = Pool(processes=mp.cpu_count()) result = pool.map_async(execute, runs, chunksize=1) start_time = time.time() while not result.ready(): completed = len(runs) - result._number_left if completed > 0: eta = (time.time() - start_time) / completed * result._number_left / 60 print '%6d left, ETA: %4.1f minutes' % (result._number_left, eta) sys.stdout.flush() time.sleep(1) pool.close() #nice parallel -j 6 --"command1""command2"
def raster2pyramid( input_file, output_dir, options ): """ Creates a tile pyramid out of an input raster dataset. """ pyramid_type = options["pyramid_type"] scale_method = options["scale_method"] output_format = options["output_format"] resampling = options["resampling"] zoom = options["zoom"] bounds = options["bounds"] overwrite = options["overwrite"] # Prepare process parameters minzoom, maxzoom = _get_zoom(zoom, input_file, pyramid_type) process_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), "tilify.py" ) with rasterio.open(input_file, "r") as input_raster: output_bands = input_raster.count input_dtype = input_raster.dtypes[0] output_dtype = input_raster.dtypes[0] nodataval = input_raster.nodatavals[0] if not nodataval: nodataval = 0 if output_format == "PNG": if output_bands > 3: output_bands = 3 output_dtype = 'uint8' scales_minmax = () if scale_method == "dtype_scale": for index in range(1, output_bands+1): scales_minmax += (DTYPE_RANGES[input_dtype], ) elif scale_method == "minmax_scale": for index in range(1, output_bands+1): band = input_raster.read(index) scales_minmax += ((band.min(), band.max()), ) elif scale_method == "crop": for index in range(1, output_bands+1): scales_minmax += ((0, 255), ) if input_dtype == "uint8": scale_method = None scales_minmax = () for index in range(1, output_bands+1): scales_minmax += ((None, None), ) # Create configuration config = {} config.update( process_file=process_file, output={ "path": output_dir, "format": output_format, "type": pyramid_type, "bands": output_bands, "dtype": output_dtype }, scale_method=scale_method, scales_minmax=scales_minmax, input_files={"raster": input_file}, config_dir=os.getcwd(), process_minzoom=minzoom, process_maxzoom=maxzoom, nodataval=nodataval, resampling=resampling, bounds=bounds, pixelbuffer=5, baselevel={"zoom": maxzoom, "resampling": resampling} ) LOGGER.info("preparing process ...") try: mapchete = Mapchete( MapcheteConfig( config, zoom=zoom, bounds=bounds ) ) except PyCompileError as error: print error return except: raise # Prepare output directory and logging if not os.path.exists(output_dir): os.makedirs(output_dir) logging.config.dictConfig(get_log_config(mapchete)) for zoom in reversed(range(minzoom, maxzoom+1)): # Determine work tiles and run work_tiles = mapchete.get_work_tiles(zoom) func = partial(_worker, mapchete=mapchete, overwrite=overwrite ) pool = Pool() try: pool.map_async(func, work_tiles) pool.close() except KeyboardInterrupt: LOGGER.info( "Caught KeyboardInterrupt, terminating workers" ) pool.terminate() break except: raise finally: pool.close() pool.join()
class ProcessPoolStrategy(ParallelStrategy, _PoolRunnableStrategy, _Resultable): _Processors_Pool: Pool = None _Processors_List: List[Union[ApplyResult, AsyncResult]] = None def __init__(self, pool_size: int): super().__init__(pool_size=pool_size) def initialization(self, queue_tasks: Optional[Union[_BaseQueueTask, _BaseList]] = None, features: Optional[Union[_BaseFeatureAdapterFactory, _BaseList]] = None, *args, **kwargs) -> None: super(ProcessPoolStrategy, self).initialization(queue_tasks=queue_tasks, features=features, *args, **kwargs) # Activate multiprocessing.managers.BaseManager server activate_manager_server() # Initialize and build the Processes Pool. __pool_initializer: Callable = kwargs.get("pool_initializer", None) __pool_initargs: IterableType = kwargs.get("pool_initargs", None) self._Processors_Pool = Pool(processes=self.pool_size, initializer=__pool_initializer, initargs=__pool_initargs) def apply(self, tasks_size: int, function: Callable, args: Tuple = (), kwargs: Dict = {}) -> None: self.reset_result() __process_running_result = None try: __process_running_result = [ self._Processors_Pool.apply(func=function, args=args, kwds=kwargs) for _ in range(tasks_size) ] __exception = None __process_run_successful = True except Exception as e: __exception = e __process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=__process_run_successful, result=__process_running_result, exception=None) def async_apply(self, tasks_size: int, function: Callable, args: Tuple = (), kwargs: Dict = {}, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() self._Processors_List = [ self._Processors_Pool.apply_async(func=function, args=args, kwds=kwargs, callback=callback, error_callback=error_callback) for _ in range(tasks_size) ] for process in self._Processors_List: _process_running_result = None _process_run_successful = None _exception = None try: _process_running_result = process.get() _process_run_successful = process.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=_process_run_successful, result=_process_running_result, exception=_exception) def apply_with_iter(self, functions_iter: List[Callable], args_iter: List[Tuple] = None, kwargs_iter: List[Dict] = None) -> None: self.reset_result() __process_running_result = None if args_iter is None: args_iter = [() for _ in functions_iter] if kwargs_iter is None: kwargs_iter = [{} for _ in functions_iter] try: __process_running_result = [ self._Processors_Pool.apply(func=_func, args=_args, kwds=_kwargs) for _func, _args, _kwargs in zip(functions_iter, args_iter, kwargs_iter) ] __exception = None __process_run_successful = True except Exception as e: __exception = e __process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=__process_run_successful, result=__process_running_result, exception=None) def async_apply_with_iter( self, functions_iter: List[Callable], args_iter: List[Tuple] = None, kwargs_iter: List[Dict] = None, callback_iter: List[Callable] = None, error_callback_iter: List[Callable] = None) -> None: self.reset_result() if args_iter is None: args_iter = [() for _ in functions_iter] if kwargs_iter is None: kwargs_iter = [{} for _ in functions_iter] if callback_iter is None: callback_iter = [None for _ in functions_iter] if error_callback_iter is None: error_callback_iter = [None for _ in functions_iter] self._Processors_List = [ self._Processors_Pool.apply_async(func=_func, args=_args, kwds=_kwargs, callback=_callback, error_callback=_error_callback) for _func, _args, _kwargs, _callback, _error_callback in zip( functions_iter, args_iter, kwargs_iter, callback_iter, error_callback_iter) ] for process in self._Processors_List: _process_running_result = None _process_run_successful = None _exception = None try: _process_running_result = process.get() _process_run_successful = process.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=_process_run_successful, result=_process_running_result, exception=_exception) def map(self, function: Callable, args_iter: IterableType = (), chunksize: int = None) -> None: self.reset_result() _process_running_result = None try: _process_running_result = self._Processors_Pool.map( func=function, iterable=args_iter, chunksize=chunksize) _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def async_map(self, function: Callable, args_iter: IterableType = (), chunksize: int = None, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() _process_running_result = None _exception = None _map_result = self._Processors_Pool.map_async( func=function, iterable=args_iter, chunksize=chunksize, callback=callback, error_callback=error_callback) try: _process_running_result = _map_result.get() _process_run_successful = _map_result.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def map_by_args(self, function: Callable, args_iter: IterableType[IterableType] = (), chunksize: int = None) -> None: self.reset_result() _process_running_result = None try: _process_running_result = self._Processors_Pool.starmap( func=function, iterable=args_iter, chunksize=chunksize) _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def async_map_by_args(self, function: Callable, args_iter: IterableType[IterableType] = (), chunksize: int = None, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() _map_result = self._Processors_Pool.starmap_async( func=function, iterable=args_iter, chunksize=chunksize, callback=callback, error_callback=error_callback) _process_running_result = _map_result.get() _process_run_successful = _map_result.successful() # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=None) def imap(self, function: Callable, args_iter: IterableType = (), chunksize: int = 1) -> None: self.reset_result() _process_running_result = None try: imap_running_result = self._Processors_Pool.imap( func=function, iterable=args_iter, chunksize=chunksize) _process_running_result = [ result for result in imap_running_result ] _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def imap_unordered(self, function: Callable, args_iter: IterableType = (), chunksize: int = 1) -> None: self.reset_result() _process_running_result = None try: imap_running_result = self._Processors_Pool.imap_unordered( func=function, iterable=args_iter, chunksize=chunksize) _process_running_result = [ result for result in imap_running_result ] _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def _result_saving(self, successful: bool, result: List, exception: Exception) -> None: _process_result = { "successful": successful, "result": result, "exception": exception } self._Processors_Running_Result.append(_process_result) def close(self) -> None: self._Processors_Pool.close() self._Processors_Pool.join() def terminal(self) -> None: self._Processors_Pool.terminate() def get_result(self) -> List[_ProcessPoolResult]: return self.result() def _saving_process(self) -> List[_ProcessPoolResult]: _pool_results = [] for __result in self._Processors_Running_Result: _pool_result = _ProcessPoolResult() _pool_result.is_successful = __result["successful"] _pool_result.data = __result["result"] _pool_results.append(_pool_result) return _pool_results
def map_async(self, func, args=(), kwargs={}, callback=None): results = NativePool.map_async(self, MultiprocessingLogExceptions(func), args, kwargs, callback) self.results.extend(results) return results
def compress_cso(fname_in, fname_out, level): fin, fout = open_input_output(fname_in, fname_out) fin.seek(0, os.SEEK_END) total_bytes = fin.tell() fin.seek(0) header_size, block_size, ver, align = 0x18, 0x800, 1, DEFAULT_ALIGN magic = ZISO_MAGIC if USE_LZ4 else CISO_MAGIC # We have to use alignment on any CSO files which > 2GB, for MSB bit of index as the plain indicator # If we don't then the index can be larger than 2GB, which its plain indicator was improperly set if total_bytes >= 2 ** 31 and align == 0: align = 1 header = generate_cso_header(magic, header_size, total_bytes, block_size, ver, align) fout.write(header) total_block = total_bytes // block_size index_buf = [0 for i in range(total_block + 1)] fout.write(b"\x00\x00\x00\x00" * len(index_buf)) show_comp_info(fname_in, fname_out, total_bytes, block_size, align, level) write_pos = fout.tell() percent_period = total_block // 100 percent_cnt = 0 if MP: pool = Pool() else: pool = None block = 0 while block < total_block: if MP: percent_cnt += min(total_block - block, MP_NR) else: percent_cnt += 1 if percent_cnt >= percent_period and percent_period != 0: percent_cnt = 0 if block == 0: print("compress %3d%% avarage rate %3d%%\r" % (block // percent_period, 0), file=sys.stderr, end="") else: print( "compress %3d%% avarage rate %3d%%\r" % (block // percent_period, 100 * write_pos // (block * 0x800)), file=sys.stderr, end="", ) if MP: iso_data = [(fin.read(block_size), level) for i in range(min(total_block - block, MP_NR))] cso_data_all = pool.map_async(zip_compress_mp, iso_data).get(9999999) for i in range(len(cso_data_all)): write_pos = set_align(fout, write_pos, align) index_buf[block] = write_pos >> align cso_data = cso_data_all[i] if 100 * len(cso_data) // len(iso_data[i][0]) >= min(COMPRESS_THREHOLD, 100): cso_data = iso_data[i][0] index_buf[block] |= 0x80000000 # Mark as plain elif index_buf[block] & 0x80000000: print( "Align error, you have to increase align by 1 or CFW won't be able to read offset above 2 ** 31 bytes" ) sys.exit(1) fout.write(cso_data) write_pos += len(cso_data) block += 1 else: iso_data = fin.read(block_size) try: cso_data = zip_compress(iso_data, level) except zlib.error as e: print("%d block: %s" % (block, e)) sys.exit(-1) write_pos = set_align(fout, write_pos, align) index_buf[block] = write_pos >> align if 100 * len(cso_data) // len(iso_data) >= COMPRESS_THREHOLD: cso_data = iso_data index_buf[block] |= 0x80000000 # Mark as plain elif index_buf[block] & 0x80000000: print( "Align error, you have to increase align by 1 or CFW won't be able to read offset above 2 ** 31 bytes" ) sys.exit(1) fout.write(cso_data) write_pos += len(cso_data) block += 1 # Last position (total size) index_buf[block] = write_pos >> align # Update index block fout.seek(len(header)) for i in index_buf: idx = pack("I", i) # assert(len(idx) == 4) fout.write(idx) print( "ciso compress completed , total size = %8d bytes , rate %d%%" % (write_pos, (write_pos * 100 // total_bytes)) ) fin.close() fout.close()
# 财经 https://news.sina.com.cn/roll/#pageid=153&lid=2516&k=&num=50&page=1 # 科技 https://news.sina.com.cn/roll/#pageid=153&lid=2515&k=&num=50&page=1 # 军事 https://news.sina.com.cn/roll/#pageid=153&lid=2514&k=&num=50&page=1 # 娱乐 https://news.sina.com.cn/roll/#pageid=153&lid=2513&k=&num=50&page=1 # 彩票 http://sports.sina.com.cn/roll/#pageid=13&lid=581&k=&num=50&page=1 # ------------------以下参数自己修改---------------- start_page = 1 end_page = 2 # home_path = 'test_data/' home_path = 'train_data/' # ------------------以上参数自己修改---------------- params = [ # pageid, lid, start_page(包含), end_page(不包含), save_path ('153', '2513', start_page, end_page, home_path + '娱乐.csv'), ('153', '2514', start_page, end_page, home_path + '军事.csv'), ('153', '2515', start_page, end_page, home_path + '科技.csv'), ('153', '2516', start_page, end_page, home_path + '财经.csv'), ('153', '2517', start_page, end_page, home_path + '股市.csv'), ('13', '585', start_page, end_page, home_path + '赛车.csv'), ('13', '571', start_page, end_page, home_path + '篮球.csv'), ('13', '572', start_page, end_page, home_path + '足球.csv'), ('13', '583', start_page, end_page, home_path + '跑步.csv'), ('13', '581', start_page, end_page, home_path + '彩票.csv'), ] print('program start...') start_time = time.time() pool = Pool(processes=multiprocessing.cpu_count() - 1) # 开启多进程,(进程并不会同时运行) pool.map_async(start_spider, params) pool.close() pool.join() print('program run time:', time.time() - start_time, 'seconds')
import random from multiprocessing.pool import Pool import multiproc_defs as defs if __name__ == '__main__': pool = Pool() to_factor = [random.randint(100000, 50000000) for i in range(20)] results = pool.map_async(defs.prime_factors, to_factor) while not results.ready(): results.wait(timeout=0) for value, factors in zip(to_factor, results.get()): print("The factors of {} are {}".format(value, factors))
start_page = 0 end_page = 1 opts, args = getopt.getopt(sys.argv[1:], "hs:e:") for cmd, arg in opts: if cmd in ("-s"): start_page = int(arg) if cmd in ("-e"): end_page = int(arg) + 1 print("start_page", start_page) print("end_page", end_page) write_page_file(start_page, end_page) pic_list = [] for i in range(start_page, end_page): pic_list += get_pic_url_list(i) print('获取完毕,开始下载图片...') start_time = time.time() pool = Pool(10) pool.map_async(download_pic, pic_list) pool.close() pool.join() print(error_page) print(f'Down done\n 耗时:{time.time() - start_time}秒')
def work(a): pass if __name__ == '__main__': pool = Pool(3) for i in range(10): result = pool.apply(work, (i, )) print(result) print("apply all done") ########################################### results = [] for i in range(10): result = pool.apply_async(work, (i, )) results.append(result) for result in results: print(result.get()) print("apply all done") ########################################### results = pool.map(work, (i, )) print(results) ########################################### results = pool.map_async(work, (i, )) print(results.get()) pool.close() pool.join()
def map_async(self, func, args=(), kwargs={}, callback=None): results = NativePool.map_async( self, MultiprocessingLogExceptions(func), args, kwargs, callback) self.results.extend(results) return results
return self.__getitem__(item) def __len__(self): return len(self.img) if __name__ == "__main__": from tqdm import trange from multiprocessing.pool import Pool data = SISTLine("/home/ziheng/indoorDist_new", None, "train") # os.makedirs("/home/ziheng/heatmaps") pool = Pool(20) cnt = 0 def readnsave(i): batch = data[i] hm = batch["heatmap"].numpy() np.save(f"/home/ziheng/heatmaps/{i}.npy", hm) def juncsave(i): batch = data[i] hm = batch["heatmap"].numpy() junc = batch["heatmap"].numpy() np.save(f"/home/ziheng/heatmaps/{i}.npy", hm) readnsave.cnt = 0 # for i in trange(len(data)): pool.map_async(readnsave, range(len(data))) pool.close() pool.join()
bucket, latest_dir + filename, ExtraArgs={ 'ContentType': CT, 'ACL': "public-read", 'CacheControl': 'no-cache' }) except Exception as error: core_fail('Upload Error ' + str(error)) ################## # Test Run Logic # ################## check_env() create_dir() # Run through all the tags pool = Pool(processes=3) r = pool.map_async(container_test, tags, callback=update_globals) r.wait() report_render() badge_render() report_upload() # Exit based on test results if report_status == 'PASS': print('Tests Passed exiting 0') sys.exit(0) elif report_status == 'FAIL': print('Tests Failed exiting 1') sys.exit(1)
def run_commands(commands, processes=None, timeout=None, meta=None, observer=None): pool = Pool(processes=processes) manager, queue, m = None, None, None manager = Manager() m = manager.Queue() if observer: queue = manager.Queue() if meta: commands = [(i, meta, command, timeout, queue, m) for i, (command, meta) in enumerate(zip(commands, meta))] else: commands = [(i, meta, command, timeout, queue, m) for i, command in enumerate(commands)] with temp_file() as f: filename = str(f) m_process = Process(target=monitor, args=(filename, m)) m_process.daemon = True m_process.start() def clean_exit(): status("Keyboard interrupt intercepted, shutting down") try: m_process.terminate() m_process.join() except Exception: status("Monitor process could not be shut down") print_exc() try: pool.terminate() pool.join() except Exception: status("Pool could not be shut down") print_exc() status("Shutting down potential orphan processes") active = set() with open(filename) as ref: for line in ref: parts = line.split(" ") if parts[0] == "ADD": active.add(int(parts[1])) elif parts[0] == "REM": active.remove(int(parts[1])) for pid in active: try: print("Killing", pid) os.killpg(pid, signal.SIGTERM) # send signal to the process group except OSError as e: if e.errno != errno.ESRCH: if e.errno == errno.EPERM: os.waitpid(-pid, 0) else: raise e except Exception: print_exc() pass os.unlink(filename) status("Completely shut down") r = pool.map_async(worker, commands) atexit.register(clean_exit) if observer: observe(observer, queue, len(commands)) r.wait() status("### DONE ##") m.put(Update.SENTINEL) m_process.join()