def run_with_multiprocessing(nprocesses, ntasks, niterations): def task(n, name): for i in atpbar(range(n), name=name): time.sleep(0.0001) def worker(reporter, task, queue): register_reporter(reporter) while True: args = queue.get() if args is None: queue.task_done() break task(*args) queue.task_done() reporter = find_reporter() queue = multiprocessing.JoinableQueue() for i in range(nprocesses): p = multiprocessing.Process(target=worker, args=(reporter, task, queue)) p.start() for i in range(ntasks): name = 'task {}'.format(i) n = niterations[i] queue.put((n, name)) for i in range(nprocesses): queue.put(None) queue.join() flush()
def close(self): """close the drop box Returns ------- None """ # end workers if self.workers: for i in range(len(self.workers)): self.task_queue.put(None) self.task_queue.join() self.workers = [] # end logging listener try: self.logging_queue.put(None) except (AssertionError, ValueError): # the queue is already closed # AssertionError: Python 3.7 # ValueError: Python 3.8+ pass self.loggingListener.join() self.task_queue.close() self.result_queue.close() self.logging_queue.close() if self.progressbar: atpbar.flush()
def process(n_threads, possibilities, doh_server): sections = len(possibilities) // n_threads starts = [] ends = [] start = 0 end = sections result = {} threads = [] for i in range(n_threads): starts.append(start) ends.append(end) start += sections end += sections for i in range(len(starts)): t = threading.Thread(target=worker, args=( possibilities, starts[i], ends[i], i, result, doh_server, )) threads.append(t) t.start() for t in threads: t.join() flush() return result
def _updateExistingGenomes(self): workerQueue = mp.Queue() writerQueue = mp.JoinableQueue() reportlist = mp.Manager().list() shortcheckmlist = mp.Manager().list() tasklist = mp.Manager().list() for record in self.list_checkm_records: workerQueue.put(record) for _ in range(self.cpus): workerQueue.put(None) try: workerProc = [ mp.Process(target=self.worker_updateExistingGenomes, args=(workerQueue, i, tasklist, reportlist, shortcheckmlist, writerQueue)) for i in range(self.cpus) ] writeProc = mp.Process(target=self.__progress, args=(len(self.list_checkm_records), writerQueue)) writeProc.start() for p in workerProc: #print('starting', p.name, '=', p.is_alive()) p.start() for p in workerProc: #print('stopping', p.name, '=', p.is_alive()) p.join() writerQueue.put(None) writeProc.join() taskProc = [] for i, list_sql in enumerate(tasklist): subproc = threading.Thread(target=self.task_sql_command, args=(list_sql, i)) subproc.start() taskProc.append(subproc) for tp in taskProc: tp.join() flush() except: for p in workerProc: p.terminate() writeProc.terminate self.logger.info('We write a report') for reportlist_item in reportlist: self.report_database_update.write(reportlist_item) result = [] for it in shortcheckmlist: result.append(it) return result
def kraft_download_via_manifest(ctx, workdir=None, manifest=None, equality=None, version=None, use_git=False, skip_verify=False): """ """ threads = list() def kraft_download_component_thread(localdir=None, manifest=None, equality=ManifestVersionEquality.EQ, version=None, use_git=False, skip_verify=False, override_existing=False): with ctx: kraft_download_component( localdir=localdir, manifest=manifest, equality=equality, version=version, use_git=use_git, skip_verify=skip_verify, override_existing=override_existing ) if workdir is None: localdir = manifest.localdir elif manifest.type == ComponentType.CORE: localdir = os.path.join(workdir, manifest.type.workdir) else: localdir = os.path.join(workdir, manifest.type.workdir, manifest.name) thread = ErrorPropagatingThread( target=kraft_download_component_thread, kwargs={ 'localdir': localdir, 'manifest': manifest, 'equality': equality, 'version': version, 'use_git': use_git, 'skip_verify': skip_verify } ) threads.append((manifest, thread)) thread.start() for manifest, thread in threads: try: thread.join() except Exception as e: logger.error("Error pulling manifest: %s " % e) if ctx.obj.verbose: import traceback logger.error(traceback.format_exc()) if sys.stdout.isatty(): flush()
def _checkPathorRemoveRecord(self): workerQueue = mp.Queue() writerQueue = mp.Queue() reportlist = mp.Manager().list() tasklist = mp.Manager().list() for record in self.dict_existing_records: workerQueue.put(record) for _ in range(self.cpus): workerQueue.put(None) # try: workerProc = [ mp.Process(target=self.worker_checkPathorRemoveRecord, args=(workerQueue, i, tasklist, reportlist, writerQueue)) for i in range(self.cpus) ] writeProc = mp.Process(target=self.__progress, args=(len(self.dict_existing_records), writerQueue)) writeProc.start() for p in workerProc: p.start() for p in workerProc: p.join() writerQueue.put(None) writeProc.join() taskProc = [] for i, list_sql in enumerate(tasklist): subproc = threading.Thread(target=self.task_sql_command, args=(list_sql, i)) subproc.start() taskProc.append(subproc) for tp in taskProc: tp.join() flush() #========================================================================= # except: # for p in workerProc: # p.terminate() # # writeProc.terminate #========================================================================= self.logger.info('We write a report') for reportlist_item in reportlist: self.report_database_update.write(reportlist_item)
def run_with_threading(nthreads=3, niterations=[5, 5, 5]): def task(n, name): for i in atpbar(range(n), name=name): time.sleep(0.0001) threads = [] for i in range(nthreads): name = 'thread {}'.format(i) n = niterations[i] t = threading.Thread(target=task, args=(n, name)) t.start() threads.append(t) for t in threads: t.join() flush()
def close(self): """close the drop box Returns ------- None """ # end workers if self.workers: for i in range(len(self.workers)): self.task_queue.put(None) self.task_queue.join() self.workers = [ ] atpbar.flush()
def receive(self): """Return pairs of task indices and results. This method waits until all tasks finish. """ messages = [ ] # a list of (task_idx, result) while self.n_ongoing_tasks >= 1: messages.extend(self._receive_finished()) time.sleep(0.0001) # sort in the order of task_idx messages = sorted(messages, key=itemgetter(0)) atpbar.flush() return messages
def run_with_threading(nthreads=3, niterations=[5, 5, 5], time_starting_task=0): def task(n, name, time_starting): time.sleep( time_starting) # When starting time is long, the loop in # the main thread might already end by the # time the loop in this task starts. for i in atpbar(range(n), name=name): time.sleep(0.0001) threads = [] for i in atpbar( range(nthreads)): # `atpbar` is used here while `atpbar` # is also used in threads being # launched in this loop. If none of # the `atpbar`s in threads has started # by the end of this loop, the # `atpbar` for this loop waits until # the progress bar for this loop # finish updating. Otherwise, progress # bars from threads are being updated # together with the progress bar for # this loop and the `atpbar` will not # wait. name = 'thread {}'.format(i) n = niterations[i] t = threading.Thread(target=task, args=(n, name, time_starting_task)) t.start() threads.append(t) time.sleep(0.01) # sleep sometime so this loop doesn't end too quickly. Without this # sleep, this loop could end before an `atpbar` in any of the # threads start even if `time_starting_task` is zero. for t in threads: t.join() flush()
f.write(item + '\n') end_time = time.time() elapsed_time = end_time - start_time if elapsed_time < seconds_to_wait_between_requests: time.sleep(seconds_to_wait_between_requests - elapsed_time) return filename reporter = find_reporter() with Pool(processes = num_processes, initializer = register_reporter, initargs = [reporter]) as pool: item_part_files = pool.map(job, split_ids_to_fetch) flush() formatted_date = datetime.utcnow().date().strftime("%Y_%m_%d") subprocess.run(f'cat data/* > data/all_items_{formatted_date}.json', shell = True, check = True) all_items_full_path = f'{os.getcwd()}/data/all_items_{formatted_date}.json' try: # cleanup any previously staged files cur.execute('remove @load_db.hackernews.%items;') # the table stage is an implicit stage created for every table so no need to create it # snowflake put will auto_compress by default into gz cur.execute(f'put file://{all_items_full_path} @load_db.hackernews.%items;') cur.execute('truncate load_db.hackernews.items;')