def _jobs_to_queue(self, jobs): queue = mp.JoinableQueue() for idx, job in enumerate(jobs): queue.put((idx, job)) for _ in range(self.workers): queue.put(None) return queue
def __init__(self, branches, domain, n_iters, executables, research_path, update_config, update_domain, n_updates): self.branches = branches self.domain = domain self.n_iters = n_iters self.executables = executables self.research_path = research_path if update_config is not None and update_config['cache'] > 0: update_config['function'] = lru_cache( maxsize=update_config['cache'])(update_config['function']) self.update_config = update_config self.n_branches = branches if isinstance(branches, int) else len(branches) self.domain = domain self.update_domain = update_domain self.n_updates = n_updates self.update_idx = 0 self._domain_size = self.domain.size if self.update_domain is not None: self.domain.set_update(**self.update_domain) self.generator = self._generate_config(self.domain) self._queue = mp.JoinableQueue() self.generated_jobs = 0
def start(self): ''' Create tasks and results queues, and start consumers. ''' mp.freeze_support() self.tasks = mp.JoinableQueue() self.results = mp.Queue() self.consumers = [ Consumer(self.tasks, self.results) for i in range(self.getNConsumers()) ] for c in self.consumers: c.start()
def __init__(self, measureFlowchart, numberProc=1): """ Object for parallel processing and preprocessing of image frames """ # Flowchart object, queues and processes self.measureFlowchart = measureFlowchart self.input_queue = multiprocessing.JoinableQueue(1) self.output_queue = multiprocessing.Queue() self.numberProc = numberProc self.processes = [ ProcessQueue(self.input_queue, self.output_queue) for _ in range(self.numberProc) ]
def __init__(self, env_id, make_env, reward_predictor, num_workers, max_timesteps_per_episode, seed): self.num_workers = num_workers self.predictor = reward_predictor self.tasks_q = multiprocess.JoinableQueue() self.results_q = multiprocess.Queue() self.actors = [] for i in range(self.num_workers): new_seed = seed * 1000 + i # Give each actor a uniquely seeded env self.actors.append(Actor(self.tasks_q, self.results_q, env_id, make_env, new_seed, max_timesteps_per_episode)) for a in self.actors: a.start() # we will start by running 20,000 / 1000 = 20 episodes for the first iteration TODO OLD self.average_timesteps_in_episode = 1000
def run(self, jobs_queue, bar=False): """ Run disributor and workers. Parameters ---------- jobs_queue : DynamicQueue of tasks n_iters : int or None logfile : str (default: 'research.log') errorfile : str (default: 'errors.log') bar : bool or callable args, kwargs will be used in worker """ self.jobs_queue = jobs_queue if isinstance(bar, bool): bar = tqdm if bar else _DummyBar() self.logger.info('Distributor [id:{}] is preparing workers'.format( os.getpid())) if isinstance(self.workers, int): workers = [ self.worker_class(devices=self.devices[i], worker_name=i, timeout=self.timeout, trials=self.trials, logger=self.logger) for i in range(self.workers) ] else: workers = [ self.worker_class(devices=self.devices[i], worker_name=i, timeout=self.timeout, trials=self.trials, logger=self.logger, worker_config=worker_config) for i, worker_config in enumerate(self.workers) ] try: self.logger.info('Create queue of jobs') self.results = mp.JoinableQueue() except Exception as exception: #pylint:disable=broad-except self.logger.error(exception) else: if len(workers) > 1: msg = 'Run {} workers' else: msg = 'Run {} worker' self.logger.info(msg.format(len(workers))) for worker in workers: try: mp.Process(target=worker, args=(self.jobs_queue, self.results)).start() except Exception as exception: #pylint:disable=broad-except self.logger.error(exception) previous_domain_jobs = 0 n_updates = 0 finished_iterations = dict() with bar(total=None) as progress: while True: n_jobs = self.jobs_queue.next_jobs(len(workers) + 1) jobs_in_queue = n_jobs finished_jobs = 0 rest_of_generator = 0 while finished_jobs != jobs_in_queue: progress.set_description('Domain updated: ' + str(n_updates)) estimated_size = self.jobs_queue.total if estimated_size is not None: total = rest_of_generator + previous_domain_jobs + estimated_size if self.n_iters is not None: total *= self.n_iters progress.total = total signal = self.results.get() if self.n_iters is not None: finished_iterations[signal.job] = signal.iteration if signal.done: finished_jobs += 1 finished_iterations[signal.job] = self.n_iters each = self.jobs_queue.domain.update_each if isinstance(each, int) and finished_jobs % each == 0: was_updated = self.jobs_queue.update() if was_updated: rest_of_generator = jobs_in_queue n_updates += was_updated if n_jobs > 0: n_jobs = self.jobs_queue.next_jobs(1) jobs_in_queue += n_jobs if self.n_iters is not None: progress.n = sum(finished_iterations.values()) else: progress.n += signal.done progress.refresh() if self.jobs_queue.domain.update_each == 'last': was_updated = self.jobs_queue.update() n_updates += 1 if not was_updated: break else: self.jobs_queue.stop_workers(len(workers)) self.jobs_queue.join() break previous_domain_jobs += finished_jobs self.logger.info('All workers have finished the work') logging.shutdown()
def __call__(self, queue, results): """ Run worker. Parameters ---------- queue : multiprocessing.Queue queue of jobs for worker results : multiprocessing.Queue queue for feedback """ _devices = [item['device'] for item in self.devices] self.logger.info('Start {} [id:{}] (devices: {})'.format( self.worker_name, os.getpid(), _devices)) try: job = queue.get() except Exception as exception: #pylint:disable=broad-except self.logger.error(exception) else: while job is not None: try: finished = False self.logger.info(self.worker_name + ' is creating process for Job ' + str(job[0])) for trial in range(self.trials): one_job_queue = mp.JoinableQueue() one_job_queue.put(job) feedback_queue = mp.JoinableQueue() last_update_time = mp.Value('d', time.time()) task = mp.Process(target=self._run_task, args=(one_job_queue, feedback_queue, trial, last_update_time)) task.start() pid = feedback_queue.get() final_signal = Signal(worker=self.worker_name, job=job[0], iteration=0, n_iters=job[1].n_iters, trial=trial, done=False, exception=None) while True: try: signal = feedback_queue.get(timeout=1) except EmptyException: signal = None if signal is None: execution_time = (time.time() - last_update_time.value) / 60 if self.timeout is not None and execution_time > self.timeout: p = psutil.Process(pid) p.terminate() message = f'Job {job[0]} [{pid}] failed in {self.worker_name} because of timeout' self.logger.info(message) final_signal.exception = TimeoutError( message) results.put(copy(final_signal)) break if signal is not None and signal.done: finished = True final_signal = signal break if signal is not None: final_signal = signal results.put(copy(final_signal)) if finished: break except Exception as exception: #pylint:disable=broad-except self.logger.error(exception) final_signal.exception = exception results.put(copy(final_signal)) if final_signal.done: results.put(copy(final_signal)) else: final_signal.exception = RuntimeError( 'Job {} [{}] failed {} times in {}'.format( job[0], pid, self.trials, self.worker_name)) final_signal.done = True results.put(copy(final_signal)) queue.task_done() job = queue.get() queue.task_done()
def run(self, jobs, dirname, n_jobs, n_iters, logfile=None, errorfile=None, progress_bar=False, *args, **kwargs): """ Run disributor and workers. Parameters ---------- jobs : iterable dirname : str logfile : str (default: 'research.log') errorfile : str (default: 'errors.log') progress_bar : bool args, kwargs will be used in worker """ self.logfile = logfile or 'research.log' self.errorfile = errorfile or 'errors.log' self.logfile = os.path.join(dirname, self.logfile) self.errorfile = os.path.join(dirname, self.errorfile) kwargs['logfile'] = self.logfile kwargs['errorfile'] = self.errorfile self.log_info('Distributor [id:{}] is preparing workers'.format( os.getpid()), filename=self.logfile) if isinstance(self.workers, int): workers = [ self.worker_class(gpu=self._get_worker_gpu(self.workers, i), worker_name=i, timeout=self.timeout, trials=self.trials, *args, **kwargs) for i in range(self.workers) ] else: workers = [ self.worker_class(gpu=self._get_worker_gpu( len(self.workers), i), worker_name=i, config=config, timeout=self.timeout, trials=self.trials, *args, **kwargs) for i, config in enumerate(self.workers) ] try: self.log_info('Create queue of jobs', filename=self.logfile) self.queue = self._jobs_to_queue(jobs) self.results = mp.JoinableQueue() except Exception as exception: #pylint:disable=broad-except logging.error(exception, exc_info=True) else: if len(workers) > 1: msg = 'Run {} workers' else: msg = 'Run {} worker' self.log_info(msg.format(len(workers)), filename=self.logfile) for worker in workers: worker.log_info = self.log_info worker.log_error = self.log_error try: mp.Process(target=worker, args=(self.queue, self.results)).start() except Exception as exception: #pylint:disable=broad-except logging.error(exception, exc_info=True) self.answers = [0 for _ in range(n_jobs)] self.finished_jobs = [] if progress_bar: if n_iters is not None: print( "Distributor has {} jobs with {} iterations. Totally: {}" .format(n_jobs, n_iters, n_jobs * n_iters)) with tqdm(total=n_jobs * n_iters) as progress: while True: position = self._get_position() progress.n = position progress.refresh() if len(self.finished_jobs) == n_jobs: break else: print("Distributor has {} jobs".format(n_jobs)) with tqdm(total=n_jobs) as progress: while True: answer = self.results.get() if answer.done: self.finished_jobs.append(answer.job) position = len(self.finished_jobs) progress.n = position progress.refresh() if len(self.finished_jobs) == n_jobs: break else: self.queue.join() self.log_info('All workers have finished the work', filename=self.logfile) logging.shutdown()
def __call__(self, queue, results): """ Run worker. Parameters ---------- queue : multiprocessing.Queue queue of jobs for worker results : multiprocessing.Queue queue for feedback """ _gpu = 'default' if len(self.gpu) == 0 else self.gpu self.log_info('Start {} [id:{}] (gpu: {})'.format( self.name, os.getpid(), _gpu), filename=self.logfile) if len(self.gpu) > 0: os.environ['CUDA_VISIBLE_DEVICES'] = ','.join( [str(gpu) for gpu in self.gpu]) try: job = queue.get() except Exception as exception: #pylint:disable=broad-except self.log_error(exception, filename=self.errorfile) else: while job is not None: try: finished = False self.log_info(self.name + ' is creating process for Job ' + str(job[0]), filename=self.logfile) for trial in range(self.trials): sub_queue = mp.JoinableQueue() sub_queue.put(job) feedback_queue = mp.JoinableQueue() worker = mp.Process(target=self._run_job, args=(sub_queue, feedback_queue, self.name, trial)) worker.start() pid = feedback_queue.get() silence = 0 default_signal = Signal(self.name, job[0], 0, job[1].n_iters, trial, False, None) while True: try: signal = feedback_queue.get(timeout=1) except Empty: signal = None silence += 1 if signal is None and silence / 60 > self.timeout: p = psutil.Process(pid) p.terminate() message = 'Job {} [{}] failed in {}'.format( job[0], pid, self.name) self.log_info(message, filename=self.logfile) default_signal.exception = TimeoutError( message) results.put(default_signal) break elif signal is not None and signal.done: finished = True default_signal = signal break elif signal is not None: default_signal = signal results.put(default_signal) silence = 0 if finished: break except Exception as exception: #pylint:disable=broad-except self.log_error(exception, filename=self.errorfile) default_signal.exception = exception results.put(default_signal) if default_signal.done: results.put(default_signal) else: default_signal.exception = RuntimeError( 'Job {} [{}] failed {} times in {}'.format( job[0], pid, self.trials, self.name)) results.put(default_signal) queue.task_done() job = queue.get() queue.task_done()
def fetchseq(ids, species, write=False, output_name='', delim='\t', id_type='brute', server=None, source="SQL", database="bioseqdb", database_path=None, host='localhost', driver='psycopg2', version='1.0', user='******', passwd='', email='', batch_size=50, output_type="fasta", verbose=1, n_threads=1, n_subthreads=1, add_length=(0, 0), indent=0): if isgenerator(ids): if verbose > 1: print('Received generator!', indent=indent) elif isinstance(ids, list): if verbose > 1: print('Received list!', indent=indent) else: if verbose > 1: print('Reading ID File... ', indent=indent) with ids.open('w') as in_handle: id_prelist = [line.strip() for line in in_handle ] # list of each line in the file print('Done!', indent=indent) ids = [id_item for id_item in filter(None, id_prelist) if id_item] if not id_prelist or id_prelist is None: if verbose: print('id_prelist is empty!', indent=indent) return 'None' for id_item in ids: assert len(id_item) == 12, ( "Item {0} in id_list has {1} items, not 5!\n" "Format should be: " "chr, (start,end), id, score, strand, thickStart, thickEnd, rgb, blockcount," " blockspans, blockstarts, query_span" "!").format( " ".join((" ".join(item) if not isinstance(item, str) else item for item in id_item)), len(id_item)) if verbose > 1: print('Readied ids!', indent=indent) id_list = multiprocessing.JoinableQueue() results = multiprocessing.Queue() if 'sql' in source.lower(): if server is None: try: if verbose > 1: print('No server received, opening server...', indent=indent) server = BioSeqDatabase.open_database(driver=driver, user=user, passwd=passwd, host=host, database=database) if verbose > 1: print('Done!', indent=indent) except Exception as err: if verbose > 1: print('Failed to open server!', indent=indent) print(str(type(err)), err, sep=' ', indent=indent) raise else: if verbose > 1: print('Received server handle:', indent=indent) print(server, indent=indent) if verbose > 2: print('Please note the sub_databases of server:\n\t', [str(i) for i in server.keys()], indent=indent) elif source.lower() in ['fasta', '2bit', 'twobit']: print('Search type: ', source, indent=indent) else: raise SearchEngineNotImplementedError( 'Search using source {} has not yet been implemented!'.format( source)) if verbose > 1: print('Creating FecSeq Processes...', indent=indent) fs_instances = [ FetchSeqMP(id_queue=id_list, seq_out_queue=results, delim=delim, id_type=id_type, server=server, species=species, source=source, database=database, database_path=database_path, host=host, driver=driver, version=version, user=user, passwd=passwd, email=email, output_type=output_type, batch_size=batch_size, verbose=verbose, n_subthreads=n_subthreads, add_length=add_length, indent=indent + 1) for _ in range(n_threads) ] if verbose > 1: print('Done! Starting processes...', indent=indent) for fs in fs_instances: fs.start() if verbose > 1: print('Done!', indent=indent) print('Assigning FetchSeq records to queue... ', indent=indent) id_order = [] for i, id_rec in enumerate(ids): try: id_order.append("{0}:{1}-{2}".format(id_rec[0], id_rec[1][0], id_rec[1][1])) except IndexError: id_order.append("{0}".format(id_rec[0])) try: id_list.put(FetchSeq(id_rec=id_rec)) except AssertionError as err: print(i, type(err), err, sep=' ') break for _ in fs_instances: id_list.put(None) if verbose > 1: print('Done!', indent=indent) output_dict = dict() missing_items_list = list() if verbose > 1: print('Getting sequences from processes... ', indent=indent) n_jobs = len(ids) while n_jobs: seq, missing = results.get() output_dict[seq[0]] = seq[1] missing_items_list.append(missing) n_jobs -= 1 if verbose > 1: print('Done! Finished fetching sequences!', indent=indent) print('Closing processes!', indent=indent) for fs in fs_instances: if fs.is_alive(): fs.join() output_list = [output_dict[i] for i in id_order if i in output_dict] if write: SeqIO.write(output_list, output_name, output_type) return else: if missing_items_list == [None]: missing_items_list = None return output_list, missing_items_list
def biosql_get_record(id_list, sub_db_name, passwd='', id_type='accession', driver="psycopg2", indent=0, user="******", host="localhost", database="bioseqdb", num_proc=2, verbose=True, server=None): """ :param sub_db_name: :param passwd: :param id_list: :param id_type: :param driver: :param indent: :param user: :param host: :param database: :param num_proc: :param verbose: :param server: :return: if __name__ == '__main__': biosql_get_record(sub_db_name='MyoLuc2.0', passwd='', id_list=['NW_005871148', 'NW_005871300', 'NW_005871148'], id_type='accession', driver="psycopg2", user="******", host="localhost", database="bioseqdb", verbose=True) """ idents = multiprocessing.JoinableQueue() results = multiprocessing.Queue() # num = multiprocessing.cpu_count() * 2 if verbose > 2: print('\tStarting biosql_get_record_mp', indent=indent) id_list = id_list if isinstance(id_list, list) else [id_list] num_jobs = len(id_list) seqdict = dict() getseqs = [ GetSeqMP(idents, results, database=database, host=host, driver=driver, user=user, passwd=passwd, sub_db_name=sub_db_name, verbose=verbose, server=server) for _ in range(num_proc) ] for gs in getseqs: gs.start() for item in id_list: idents.put( BioSeqLookupCascade(id_type=id_type, identifier=item, verbose=verbose, indent=indent)) for i in range(num_proc): idents.put(None) while num_jobs: temp = results.get() print(temp, indent=indent) temp[1].name = temp[0] seqdict[temp[0]] = temp[1] num_jobs -= 1 if verbose: print('Done with biosql_get_record_mp!', indent=indent) print('Closing processes!', indent=indent) for gs in getseqs: if gs.is_alive(): gs.join() itemsnotfound = [i for i in id_list if i not in seqdict.keys()] return seqdict, itemsnotfound