def load_pdbs_multiprocess(self, names, parallel): self.read_new_pdbs, tmp = True, self.read_new_pdbs data = self.clear_caches() needs_unlock = False if not self._holding_lock: needs_unlock = True if not self.acquire_cachedir_lock(): raise ValueError( 'cachedir locked, cant write new entries.\n' 'If no other worms jobs are running, you may manually remove:\n' + self.cachedirs[0] + '/lock') exe = util.InProcessExecutor() if parallel: exe = cf.ProcessPoolExecutor(max_workers=parallel) with exe as pool: futures = list() for n in names: futures.append( pool.submit(self.build_pdb_data, self._dictdb[n], uselock=False)) iter = cf.as_completed(futures) iter = tqdm(iter, 'loading pdb files', total=len(futures)) for f in iter: f.result() if needs_unlock: self.unlock_cachedir() self.restore_caches(data) self.read_new_pdbs = tmp
def worms_main_each_mergebb( criteria, precache_splices, merge_bblock, parallel, verbosity, bbs, pbar, only_merge_bblocks, merge_segment, **kw, ): exe = util.InProcessExecutor() if parallel: exe = cf.ProcessPoolExecutor(max_workers=parallel) bbs_states = [[b._state for b in bb] for bb in bbs] # kw['db'][0].clear_bblocks() # remove cached BBlocks kw["db"][0].clear() kw["db"][1].clear() with exe as pool: mseg = merge_segment if mseg is None: mseg = criteria.merge_segment(**kw) if mseg is None: mseg = 0 merge_bblock_list = range(len(bbs[mseg])) if only_merge_bblocks: merge_bblock_list = only_merge_bblocks futures = [ pool.submit( worms_main_protocol, criteria, merge_bblock=i, parallel=0, verbosity=verbosity, bbs_states=bbs_states, precache_splices=precache_splices, pbar=pbar, merge_segment=merge_segment, **kw, ) for i in merge_bblock_list ] log = [f"split job over merge_segment={mseg}, n = {len(futures)}"] print(log[-1]) fiter = cf.as_completed(futures) for f in fiter: log.extend(f.result()) if pbar and log: log = [""] * len(futures) + log return log
def load_from_pdbs(self): shuffle(self._alldb) if self.nprocs is 1: with util.InProcessExecutor() as exe: result = self.load_from_pdbs_inner(exe) else: with cf.ThreadPoolExecutor(max_workers=self.nprocs) as exe: result = self.load_from_pdbs_inner(exe) new = [_[0] for _ in result if _[0]] missing = [_[1] for _ in result if _[1]] for miss in missing: self._alldb.remove(self._dictdb[miss]) del self._dictdb[miss] return len(new), len(missing)