def precompute_splicedb(db, bbpairs, **kw): bbdb, spdb = db # note: this is duplicated in edge.py and they need to be the same params = ( kw["splice_max_rms"], kw["splice_ncontact_cut"], kw["splice_clash_d2"], kw["splice_contact_d2"], kw["splice_rms_range"], kw["splice_clash_contact_range"], kw["splice_clash_contact_by_helix"], kw["splice_ncontact_no_helix_cut"], kw["splice_nhelix_contacted_cut"], kw["splice_max_chain_length"], kw["splice_min_dotz"], ) bbpairs = _remove_already_cached(spdb, bbpairs, params) if not bbpairs: return splices = compute_splices(bbdb, bbpairs, **kw) for key, val in splices.items(): pdbkey0 = hash_str_to_int(key[0]) pdbkey1 = hash_str_to_int(key[1]) spdb.add(params, pdbkey0, pdbkey1, val) spdb.sync_to_disk() print("precompute_splicedb done") sys.stdout.flush()
def _convert_from_pdb(): """almost the same, dicts little slower, little bigger""" for f in glob.glob("/home/sheffler/.worms/cache/splices.bk/*/*.pickle"): pdbfile = os.path.basename(f).replace("__", "/")[:-7] pdbkey = hash_str_to_int(pdbfile) newf = ("/home/sheffler/.worms/cache/splices/" + "%016x" % 5633173723268761018 + "/" + "%016x" % pdbkey + ".pickle") newcachhe = dict() with open(f, "rb") as inp: cache = _pickle.load(inp) for k0, v0 in cache.items(): assert len(v0) == 2 assert isinstance(v0[0], np.ndarray) assert isinstance(v0[1], np.ndarray) newcachhe[hash_str_to_int(k0)] = v0 with open(newf, "wb") as out: _pickle.dump(newcachhe, out)
def _remove_already_cached(spdb, bbpairs, params): pairmap = defaultdict(list) for a, b in bbpairs: pairmap[a].append(b) ret = list() for pdb0, pdb1s in pairmap.items(): pdbkey0 = hash_str_to_int(pdb0) if all(spdb.has(params, pdbkey0, hash_str_to_int(p1)) for p1 in pdb1s): continue listpath = spdb.listpath(params, pdbkey0) haveit = set() if os.path.exists(listpath): with open(listpath, "rb") as inp: haveit = _pickle.load(inp) for pdb1 in pdb1s: pdbkey1 = hash_str_to_int(pdb1) if not pdbkey1 in haveit: ret.append((pdb0, pdb1)) return ret
def bblock(self, pdbkey): if isinstance(pdbkey, (str, bytes)): pdbkey = hash_str_to_int(pdbkey) if isinstance(pdbkey, int): if not pdbkey in self._bblock_cache: if not self.load_cached_bblock_into_memory(pdbkey): pdbfile = self._key_to_pdbfile[pdbkey] raise ValueError("no bblock data for key", pdbkey, pdbfile, "in", self.cachedirs) return self._bblock_cache[pdbkey] elif isinstance(pdbkey, list): return [self.bblock(f) for f in pdbkey] else: raise ValueError("bad pdbkey" + str(type(pdbkey)))
def bblock(self, pdbkey): if isinstance(pdbkey, list): return [self.bblock(f) for f in pdbkey] if isinstance(pdbkey, (str, bytes)): pdbkey = hash_str_to_int(pdbkey) assert isinstance(pdbkey, int) if not pdbkey in self._bblock_cache: pdbfile = self._key_to_pdbfile[pdbkey] pose = self.pose(pdbfile) entry = self._dictdb[pdbfile] ss = Dssp(pose).get_dssp_secstruct() bblock = BBlock(entry, pdbfile, pdbkey, pose, ss) self._bblock_cache[pdbkey] = bblock return self._bblock_cache[pdbkey]
def build_pdb_data(self, entry, uselock=True): """return Nnew, Nmissing""" pdbfile = entry['file'] pdbkey = hash_str_to_int(pdbfile) cachefile = self.bblockfile(pdbkey) posefile = self.posefile(pdbfile) if os.path.exists(cachefile): if not self.load_cached_bblock_into_memory(pdbkey): if os.path.exists(cachefile): raise ValueError( f'cachefile {cachefile} exists, but cant load data from associated key {pdbkey}' ) raise ValueError( f'cachefile {cachefile} was removed, cant load data from associated key {pdbkey}' ) if self.load_poses: if not self.load_cached_pose_into_memory(pdbfile): print('warning, not saved:', pdbfile) return None, None # new, missing elif self.read_new_pdbs: if uselock: self.check_lock_cachedir() read_pdb = False # info('CachingBBlockDB.build_pdb_data reading %s' % pdbfile) pose = self.pose(pdbfile) ss = Dssp(pose).get_dssp_secstruct() bblock = BBlock(entry, pdbfile, pdbkey, pose, ss) self._bblock_cache[pdbkey] = bblock # print(cachefile) with open(cachefile, 'wb') as f: pickle.dump(bblock._state, f) # print('saved new bblock cache file', cachefile) if not os.path.exists(posefile): try: with open(posefile, 'wb') as f: pickle.dump(pose, f) info('dumped _bblock_cache files for %s' % pdbfile) except OSError as e: print('not saving', posefile) if self.load_poses: self._poses_cache[pdbfile] = pose return pdbfile, None # new, missing else: warning('no cached data for: ' + pdbfile) return None, pdbfile # new, missing
def _read_dbfiles(bbdb, dbfiles): bbdb._alldb = [] for dbfile in dbfiles: with open(dbfile) as f: try: bbdb._alldb.extend(json.load(f)) except json.decoder.JSONDecodeError as e: print('ERROR on json file:', dbfile) print(e) sys.exit() for entry in bbdb._alldb: if 'name' not in entry: entry['name'] = '' entry['file'] = entry['file'].replace( '__DATADIR__', os.path.relpath(os.path.dirname(__file__) + '/data')) bbdb._dictdb = {e['file']: e for e in bbdb._alldb} bbdb._key_to_pdbfile = { hash_str_to_int(e['file']): e['file'] for e in bbdb._alldb }
def _read_dbfiles(bbdb, dbfiles, dbroot=""): bbdb._alldb = [] for dbfile in dbfiles: with open(dbfile) as f: try: bbdb._alldb.extend(json.load(f)) except json.decoder.JSONDecodeError as e: print("ERROR on json file:", dbfile) print(e) sys.exit() for entry in bbdb._alldb: if "name" not in entry: entry["name"] = "" entry["file"] = entry["file"].replace("__DATADIR__", os.path.relpath(os.path.dirname(__file__) + "/data")) bbdb._dictdb = {e["file"]: e for e in bbdb._alldb} bbdb._key_to_pdbfile = {hash_str_to_int(e["file"]): e["file"] for e in bbdb._alldb} pdb_files_missing = False for entry in bbdb._alldb: if not os.path.exists(dbroot + entry["file"]): pdb_files_missing = True print("pdb file pdb_files_missing:", entry["file"]) assert not pdb_files_missing
def BBlock(entry, pdbfile, filehash, pose, ss): json = dumps(entry) chains = util.get_chain_bounds(pose) ss = np.frombuffer(ss.encode(), dtype='i1') ncac = util.get_bb_coords(pose) cb = util.get_cb_coords(pose) stubs = _ncac_to_stubs(ncac) com = np.mean(cb, axis=0) rg = np.sqrt(np.sum((cb - com)**2) / len(cb)) assert len(pose) == len(ncac) assert len(pose) == len(stubs) assert len(pose) == len(ss) conn = _make_connections_array(entry['connections'], chains) if len(conn) is 0: print('bad conn info!', pdbfile) return None, pdbfile # new, missing if ncac.shape[-1] is 4: ncac = ncac.astype(np.float64) elif ncac.shape[-1] is 3: tmp = np.ones((ncac.shape[0], 3, 4), dtype=np.float64) tmp[..., :3] = ncac ncac = tmp else: assert 0, 'bad ncac' assert cb.shape == (len(pose), 4) if entry['base'] not in ('', 'n/a'): basehash = hash_str_to_int(entry['base']) else: basehash = 0 def npfb(s): if isinstance(s, list): s = '[' + ','.join(s) + ']' return np.frombuffer(s.encode(), dtype='i1') bblock = _BBlock( json=npfb(json), connections=conn, file=npfb(entry['file']), filehash=filehash, components=npfb(str(entry['components'])), protocol=npfb(entry['protocol']), name=npfb(entry['name']), classes=npfb(','.join(entry['class'])), validated=entry['validated'], _type=npfb(entry['type']), base=npfb(entry['base']), basehash=basehash, ncac=np.ascontiguousarray(ncac), cb=np.ascontiguousarray(cb), chains=np.array(chains, dtype='i4'), ss=ss, stubs=np.ascontiguousarray(stubs.astype('f8')), com=com, rg=rg, ) return bblock
def BBlock(entry, pdbfile, filehash, pose, ss, null_base_names, **kw): json = dumps(entry) chains = util.get_chain_bounds(pose) ss = np.frombuffer(ss.encode(), dtype="i1") ncac = util.get_bb_coords(pose) cb = util.get_cb_coords(pose) stubs = _ncac_to_stubs(ncac) com = np.mean(cb, axis=0) rg = np.sqrt(np.sum((cb - com)**2) / len(cb)) assert len(pose) == len(ncac) assert len(pose) == len(stubs) assert len(pose) == len(ss) conn = _make_connections_array(entry["connections"], chains) if len(conn) is 0: print("bad conn info!", pdbfile) assert 0 return None, pdbfile # new, missing if ncac.shape[-1] is 4: ncac = ncac.astype(np.float64) elif ncac.shape[-1] is 3: tmp = np.ones((ncac.shape[0], 3, 4), dtype=np.float64) tmp[..., :3] = ncac ncac = tmp else: assert 0, "bad ncac" assert cb.shape == (len(pose), 4) if entry["base"] in null_base_names: basehash = 0 else: basehash = hash_str_to_int(entry["base"]) def npfb(s): if isinstance(s, list): s = "[" + ",".join(s) + "]" return np.frombuffer(s.encode(), dtype="i1") validated = entry["validated"] if validated in ("na", "NA"): validated = False bblock = _BBlock( json=npfb(json), connections=conn, file=npfb(entry["file"]), filehash=filehash, components=npfb(str(entry["components"])), protocol=npfb(entry["protocol"]), name=npfb(entry["name"]), classes=npfb(",".join(entry["class"])), validated=validated, _type=npfb(entry["type"]), base=npfb(entry["base"]), basehash=basehash, ncac=np.ascontiguousarray(ncac), cb=np.ascontiguousarray(cb), chains=np.array(chains, dtype="i4"), ss=ss, stubs=np.ascontiguousarray(stubs.astype("f8")), com=com, rg=rg, ) return bblock