示例#1
0
def precompute_splicedb(db, bbpairs, **kw):
    bbdb, spdb = db

    # note: this is duplicated in edge.py and they need to be the same
    params = (
        kw["splice_max_rms"],
        kw["splice_ncontact_cut"],
        kw["splice_clash_d2"],
        kw["splice_contact_d2"],
        kw["splice_rms_range"],
        kw["splice_clash_contact_range"],
        kw["splice_clash_contact_by_helix"],
        kw["splice_ncontact_no_helix_cut"],
        kw["splice_nhelix_contacted_cut"],
        kw["splice_max_chain_length"],
        kw["splice_min_dotz"],
    )
    bbpairs = _remove_already_cached(spdb, bbpairs, params)
    if not bbpairs:
        return

    splices = compute_splices(bbdb, bbpairs, **kw)
    for key, val in splices.items():
        pdbkey0 = hash_str_to_int(key[0])
        pdbkey1 = hash_str_to_int(key[1])
        spdb.add(params, pdbkey0, pdbkey1, val)

    spdb.sync_to_disk()
    print("precompute_splicedb done")
    sys.stdout.flush()
示例#2
0
文件: tmp.py 项目: willsheffler/worms
def _convert_from_pdb():
    """almost the same, dicts little slower, little bigger"""
    for f in glob.glob("/home/sheffler/.worms/cache/splices.bk/*/*.pickle"):
        pdbfile = os.path.basename(f).replace("__", "/")[:-7]
        pdbkey = hash_str_to_int(pdbfile)
        newf = ("/home/sheffler/.worms/cache/splices/" +
                "%016x" % 5633173723268761018 + "/" + "%016x" % pdbkey +
                ".pickle")
        newcachhe = dict()
        with open(f, "rb") as inp:
            cache = _pickle.load(inp)
            for k0, v0 in cache.items():
                assert len(v0) == 2
                assert isinstance(v0[0], np.ndarray)
                assert isinstance(v0[1], np.ndarray)
                newcachhe[hash_str_to_int(k0)] = v0
        with open(newf, "wb") as out:
            _pickle.dump(newcachhe, out)
示例#3
0
def _remove_already_cached(spdb, bbpairs, params):
    pairmap = defaultdict(list)
    for a, b in bbpairs:
        pairmap[a].append(b)
    ret = list()
    for pdb0, pdb1s in pairmap.items():
        pdbkey0 = hash_str_to_int(pdb0)
        if all(spdb.has(params, pdbkey0, hash_str_to_int(p1)) for p1 in pdb1s):
            continue
        listpath = spdb.listpath(params, pdbkey0)
        haveit = set()
        if os.path.exists(listpath):
            with open(listpath, "rb") as inp:
                haveit = _pickle.load(inp)
        for pdb1 in pdb1s:
            pdbkey1 = hash_str_to_int(pdb1)
            if not pdbkey1 in haveit:
                ret.append((pdb0, pdb1))
    return ret
示例#4
0
 def bblock(self, pdbkey):
    if isinstance(pdbkey, (str, bytes)):
       pdbkey = hash_str_to_int(pdbkey)
    if isinstance(pdbkey, int):
       if not pdbkey in self._bblock_cache:
          if not self.load_cached_bblock_into_memory(pdbkey):
             pdbfile = self._key_to_pdbfile[pdbkey]
             raise ValueError("no bblock data for key", pdbkey, pdbfile, "in", self.cachedirs)
       return self._bblock_cache[pdbkey]
    elif isinstance(pdbkey, list):
       return [self.bblock(f) for f in pdbkey]
    else:
       raise ValueError("bad pdbkey" + str(type(pdbkey)))
示例#5
0
 def bblock(self, pdbkey):
     if isinstance(pdbkey, list):
         return [self.bblock(f) for f in pdbkey]
     if isinstance(pdbkey, (str, bytes)):
         pdbkey = hash_str_to_int(pdbkey)
     assert isinstance(pdbkey, int)
     if not pdbkey in self._bblock_cache:
         pdbfile = self._key_to_pdbfile[pdbkey]
         pose = self.pose(pdbfile)
         entry = self._dictdb[pdbfile]
         ss = Dssp(pose).get_dssp_secstruct()
         bblock = BBlock(entry, pdbfile, pdbkey, pose, ss)
         self._bblock_cache[pdbkey] = bblock
     return self._bblock_cache[pdbkey]
示例#6
0
    def build_pdb_data(self, entry, uselock=True):
        """return Nnew, Nmissing"""
        pdbfile = entry['file']
        pdbkey = hash_str_to_int(pdbfile)
        cachefile = self.bblockfile(pdbkey)
        posefile = self.posefile(pdbfile)
        if os.path.exists(cachefile):
            if not self.load_cached_bblock_into_memory(pdbkey):
                if os.path.exists(cachefile):
                    raise ValueError(
                        f'cachefile {cachefile} exists, but cant load data from associated key {pdbkey}'
                    )
                raise ValueError(
                    f'cachefile {cachefile} was removed, cant load data from associated key {pdbkey}'
                )
            if self.load_poses:
                if not self.load_cached_pose_into_memory(pdbfile):
                    print('warning, not saved:', pdbfile)
            return None, None  # new, missing
        elif self.read_new_pdbs:
            if uselock: self.check_lock_cachedir()
            read_pdb = False
            # info('CachingBBlockDB.build_pdb_data reading %s' % pdbfile)
            pose = self.pose(pdbfile)
            ss = Dssp(pose).get_dssp_secstruct()
            bblock = BBlock(entry, pdbfile, pdbkey, pose, ss)
            self._bblock_cache[pdbkey] = bblock
            # print(cachefile)
            with open(cachefile, 'wb') as f:
                pickle.dump(bblock._state, f)
            # print('saved new bblock cache file', cachefile)
            if not os.path.exists(posefile):
                try:
                    with open(posefile, 'wb') as f:
                        pickle.dump(pose, f)
                        info('dumped _bblock_cache files for %s' % pdbfile)
                except OSError as e:
                    print('not saving', posefile)

            if self.load_poses:
                self._poses_cache[pdbfile] = pose
            return pdbfile, None  # new, missing
        else:
            warning('no cached data for: ' + pdbfile)
            return None, pdbfile  # new, missing
示例#7
0
def _read_dbfiles(bbdb, dbfiles):
    bbdb._alldb = []
    for dbfile in dbfiles:
        with open(dbfile) as f:
            try:
                bbdb._alldb.extend(json.load(f))
            except json.decoder.JSONDecodeError as e:
                print('ERROR on json file:', dbfile)
                print(e)
                sys.exit()
    for entry in bbdb._alldb:
        if 'name' not in entry:
            entry['name'] = ''
        entry['file'] = entry['file'].replace(
            '__DATADIR__',
            os.path.relpath(os.path.dirname(__file__) + '/data'))
    bbdb._dictdb = {e['file']: e for e in bbdb._alldb}
    bbdb._key_to_pdbfile = {
        hash_str_to_int(e['file']): e['file']
        for e in bbdb._alldb
    }
示例#8
0
def _read_dbfiles(bbdb, dbfiles, dbroot=""):
   bbdb._alldb = []
   for dbfile in dbfiles:
      with open(dbfile) as f:
         try:
            bbdb._alldb.extend(json.load(f))
         except json.decoder.JSONDecodeError as e:
            print("ERROR on json file:", dbfile)
            print(e)
            sys.exit()
   for entry in bbdb._alldb:
      if "name" not in entry:
         entry["name"] = ""
      entry["file"] = entry["file"].replace("__DATADIR__",
                                            os.path.relpath(os.path.dirname(__file__) + "/data"))
   bbdb._dictdb = {e["file"]: e for e in bbdb._alldb}
   bbdb._key_to_pdbfile = {hash_str_to_int(e["file"]): e["file"] for e in bbdb._alldb}

   pdb_files_missing = False
   for entry in bbdb._alldb:
      if not os.path.exists(dbroot + entry["file"]):
         pdb_files_missing = True
         print("pdb file pdb_files_missing:", entry["file"])
   assert not pdb_files_missing
示例#9
0
def BBlock(entry, pdbfile, filehash, pose, ss):

    json = dumps(entry)
    chains = util.get_chain_bounds(pose)
    ss = np.frombuffer(ss.encode(), dtype='i1')
    ncac = util.get_bb_coords(pose)
    cb = util.get_cb_coords(pose)
    stubs = _ncac_to_stubs(ncac)
    com = np.mean(cb, axis=0)
    rg = np.sqrt(np.sum((cb - com)**2) / len(cb))

    assert len(pose) == len(ncac)
    assert len(pose) == len(stubs)
    assert len(pose) == len(ss)
    conn = _make_connections_array(entry['connections'], chains)
    if len(conn) is 0:
        print('bad conn info!', pdbfile)
        return None, pdbfile  # new, missing
    if ncac.shape[-1] is 4:
        ncac = ncac.astype(np.float64)
    elif ncac.shape[-1] is 3:
        tmp = np.ones((ncac.shape[0], 3, 4), dtype=np.float64)
        tmp[..., :3] = ncac
        ncac = tmp
    else:
        assert 0, 'bad ncac'
    assert cb.shape == (len(pose), 4)

    if entry['base'] not in ('', 'n/a'):
        basehash = hash_str_to_int(entry['base'])
    else:
        basehash = 0

    def npfb(s):
        if isinstance(s, list):
            s = '[' + ','.join(s) + ']'
        return np.frombuffer(s.encode(), dtype='i1')

    bblock = _BBlock(
        json=npfb(json),
        connections=conn,
        file=npfb(entry['file']),
        filehash=filehash,
        components=npfb(str(entry['components'])),
        protocol=npfb(entry['protocol']),
        name=npfb(entry['name']),
        classes=npfb(','.join(entry['class'])),
        validated=entry['validated'],
        _type=npfb(entry['type']),
        base=npfb(entry['base']),
        basehash=basehash,
        ncac=np.ascontiguousarray(ncac),
        cb=np.ascontiguousarray(cb),
        chains=np.array(chains, dtype='i4'),
        ss=ss,
        stubs=np.ascontiguousarray(stubs.astype('f8')),
        com=com,
        rg=rg,
    )

    return bblock
示例#10
0
def BBlock(entry, pdbfile, filehash, pose, ss, null_base_names, **kw):

   json = dumps(entry)
   chains = util.get_chain_bounds(pose)
   ss = np.frombuffer(ss.encode(), dtype="i1")
   ncac = util.get_bb_coords(pose)
   cb = util.get_cb_coords(pose)
   stubs = _ncac_to_stubs(ncac)
   com = np.mean(cb, axis=0)
   rg = np.sqrt(np.sum((cb - com)**2) / len(cb))

   assert len(pose) == len(ncac)
   assert len(pose) == len(stubs)
   assert len(pose) == len(ss)
   conn = _make_connections_array(entry["connections"], chains)
   if len(conn) is 0:
      print("bad conn info!", pdbfile)
      assert 0
      return None, pdbfile  # new, missing
   if ncac.shape[-1] is 4:
      ncac = ncac.astype(np.float64)
   elif ncac.shape[-1] is 3:
      tmp = np.ones((ncac.shape[0], 3, 4), dtype=np.float64)
      tmp[..., :3] = ncac
      ncac = tmp
   else:
      assert 0, "bad ncac"
   assert cb.shape == (len(pose), 4)

   if entry["base"] in null_base_names: basehash = 0
   else: basehash = hash_str_to_int(entry["base"])

   def npfb(s):
      if isinstance(s, list):
         s = "[" + ",".join(s) + "]"
      return np.frombuffer(s.encode(), dtype="i1")

   validated = entry["validated"]
   if validated in ("na", "NA"):
      validated = False

   bblock = _BBlock(
      json=npfb(json),
      connections=conn,
      file=npfb(entry["file"]),
      filehash=filehash,
      components=npfb(str(entry["components"])),
      protocol=npfb(entry["protocol"]),
      name=npfb(entry["name"]),
      classes=npfb(",".join(entry["class"])),
      validated=validated,
      _type=npfb(entry["type"]),
      base=npfb(entry["base"]),
      basehash=basehash,
      ncac=np.ascontiguousarray(ncac),
      cb=np.ascontiguousarray(cb),
      chains=np.array(chains, dtype="i4"),
      ss=ss,
      stubs=np.ascontiguousarray(stubs.astype("f8")),
      com=com,
      rg=rg,
   )

   return bblock