def shukla_coords(trajectories,KER,Aloop,SRC2): difference = [] rmsd = [] for traj in trajectories: # append difference k295e310 = md.compute_contacts(traj, [KER[0]]) e310r409 = md.compute_contacts(traj, [KER[1]]) difference.append(10*(e310r409[0] - k295e310[0])) # 10x because mdtraj is naturally in nm # append rmsd Activation_Loop_SRC2 = SRC2.top.select("backbone and (resid %s to %s)" %(140,160)) Activation_Loop_kinase = traj.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1])) SRC2_cut = SRC2.atom_slice(Activation_Loop_SRC2) traj_cut = traj.atom_slice(Activation_Loop_kinase) rmsd.append(10*(md.rmsd(traj_cut,SRC2_cut,frame=0))) # 10x because mdtraj is naturaly in nm # flatten list of arrays flattened_difference = np.asarray([val for sublist in difference for val in sublist]) flattened_rmsd = np.asarray([val for sublist in rmsd for val in sublist]) return [flattened_rmsd, flattened_difference]
def test_trek(): # setup with open("processed/p9761/24/7/info.json") as f: info = json.load(f) info = trajprocess.postprocess.stp(info, 'trek') # check stp cleanup assert not os.path.exists('{workdir}/stp/0/'.format(**info['path'])) # check stp results traj = mdtraj.load(info['stp']['gens'][0], top=info['stp']['outtop']) assert traj.n_atoms == 30962 assert len(traj) == 7 # do ctr info = trajprocess.postprocess.ctr(info, "trek") # check ctr info assert not os.path.exists("{workdir}/cpptraj.tmp".format(**info['path'])) assert not os.path.exists( "{workdir}/ctr/cpptraj.tmp".format(**info['path'])) traj2 = mdtraj.load(info['ctr']['gens'][0], top=info['stp']['outtop']) # check ctr results # Trek has 518 protein residues pairs = np.random.randint(0, 518, (20, 2)) cont1, _ = mdtraj.compute_contacts(traj, pairs) cont2, _ = mdtraj.compute_contacts(traj2, pairs) np.testing.assert_array_almost_equal(cont1, cont2, decimal=4)
def test_contact_0(): pdb = md.load(get_fn('bpti.pdb')) contacts = np.loadtxt(get_fn('contacts.dat')).astype(int) ca, ca_pairs = md.compute_contacts(pdb, contacts, scheme='ca') closest, closest_pairs = md.compute_contacts(pdb, contacts, scheme='closest') closest_heavy, closest_heavy_pairs = md.compute_contacts(pdb, contacts, scheme='closest-heavy') sidechain, sidechain_pairs = md.compute_contacts(pdb, contacts, scheme='sidechain') sidechain_heavy, sidechain_heavy_pairs = md.compute_contacts(pdb, contacts, scheme='sidechain-heavy') ref_ca = np.loadtxt(get_fn('cc_ca.dat')) ref_closest = np.loadtxt(get_fn('cc_closest.dat')) ref_closest_heavy = np.loadtxt(get_fn('cc_closest-heavy.dat')) ref_sidechain = np.loadtxt(get_fn('cc_sidechain.dat')) ref_sidechain_heavy = np.loadtxt(get_fn('cc_sidechain-heavy.dat')) eq(ref_ca, ca.flatten()) eq(ref_closest, closest.flatten()) eq(ref_closest_heavy, closest_heavy.flatten()) eq(ref_sidechain, sidechain.flatten()) eq(ref_sidechain_heavy, sidechain_heavy.flatten()) eq(contacts, ca_pairs) eq(contacts, closest_pairs) eq(contacts, closest_heavy_pairs) eq(contacts, sidechain_pairs) eq(contacts, sidechain_heavy_pairs)
def test_contact_0(get_fn): pdb = md.load(get_fn('bpti.pdb')) contacts = np.loadtxt(get_fn('contacts.dat')).astype(int) ca, ca_pairs = md.compute_contacts(pdb, contacts, scheme='ca') closest, closest_pairs = md.compute_contacts(pdb, contacts, scheme='closest') closest_heavy, closest_heavy_pairs = md.compute_contacts( pdb, contacts, scheme='closest-heavy') sidechain, sidechain_pairs = md.compute_contacts(pdb, contacts, scheme='sidechain') sidechain_heavy, sidechain_heavy_pairs = md.compute_contacts( pdb, contacts, scheme='sidechain-heavy') ref_ca = np.loadtxt(get_fn('cc_ca.dat')) ref_closest = np.loadtxt(get_fn('cc_closest.dat')) ref_closest_heavy = np.loadtxt(get_fn('cc_closest-heavy.dat')) ref_sidechain = np.loadtxt(get_fn('cc_sidechain.dat')) ref_sidechain_heavy = np.loadtxt(get_fn('cc_sidechain-heavy.dat')) eq(ref_ca, ca.flatten()) eq(ref_closest, closest.flatten()) eq(ref_closest_heavy, closest_heavy.flatten()) eq(ref_sidechain, sidechain.flatten()) eq(ref_sidechain_heavy, sidechain_heavy.flatten()) eq(contacts, ca_pairs) eq(contacts, closest_pairs) eq(contacts, closest_heavy_pairs) eq(contacts, sidechain_pairs) eq(contacts, sidechain_heavy_pairs)
def shukla_coords(trajectories, KER, Aloop, SRC2): difference = [] rmsd = [] for traj in trajectories: # append difference k295e310 = md.compute_contacts(traj, [KER[0]]) e310r409 = md.compute_contacts(traj, [KER[1]]) difference.append( 10 * (e310r409[0] - k295e310[0])) # 10x because mdtraj is naturally in nm # append rmsd Activation_Loop_SRC2 = SRC2.top.select( "backbone and (resid %s to %s)" % (Aloop[0], Aloop[1])) Activation_Loop_kinase = traj.top.select( "backbone and (resid %s to %s)" % (Aloop[0], Aloop[1])) SRC2_cut = SRC2.atom_slice(Activation_Loop_SRC2) traj_cut = traj.atom_slice(Activation_Loop_kinase) rmsd.append(10 * (md.rmsd(traj_cut, SRC2_cut, frame=0))) # 10x because mdtraj is naturaly in nm return [rmsd, difference]
def test_Residue_Mindist_Ca_array_periodic(self): traj = mdtraj.load(pdbfile) # Atoms most far appart in Z atom_minz = traj.xyz.argmin(1).squeeze()[-1] atom_maxz = traj.xyz.argmax(1).squeeze()[-1] # Residues with the atoms most far appart in Z res_minz = traj.topology.atom(atom_minz).residue.index res_maxz = traj.topology.atom(atom_maxz).residue.index contacts = np.array([[res_minz, res_maxz]]) # Tweak the trajectory so that a (bogus) PBC exists (otherwise traj._have_unitcell is False) traj.unitcell_angles = [90, 90, 90] traj.unitcell_lengths = [1, 1, 1] self.feat.add_residue_mindist(scheme='ca', residue_pairs=contacts, periodic=False) D = self.feat.transform(traj) Dperiodic_true = mdtraj.compute_contacts(traj, scheme='ca', contacts=contacts, periodic=True)[0] Dperiodic_false = mdtraj.compute_contacts(traj, scheme='ca', contacts=contacts, periodic=False)[0] # This asserts that the periodic option is having an effect at all assert not np.allclose( Dperiodic_false, Dperiodic_true, ) # This asserts that the periodic option is being handled correctly by pyemma assert np.allclose(D, Dperiodic_false) assert len(self.feat.describe()) == self.feat.dimension()
def catkhrd(trajectories): # define empty lists D218 = [] D222 = [] for traj in trajectories: #append h188s218 difference h188s218 = md.compute_contacts(traj, [[120,151]],scheme='ca') D218.append(h188s218[0]) #append k97s222 difference k97s222 = md.compute_contacts(traj, [[29,155]],scheme='ca') D222.append(k97s222[0]) #flatten these lists of arrays flattened_h188s218 = np.asarray([val for sublist in D218 for val in sublist]) flattened_k97s222 = np.asarray([val for sublist in D222 for val in sublist]) return [flattened_h188s218, flattened_k97s222]
def shukla_coords_byrun(files,KER,Aloop,SRC2): difference = [] rmsd = [] difference_combinetrajs = [] rmsd_combinetrajs = [] path_base = files.split('*')[0] clone0_files = "%s/*clone0.h5" % path_base globfiles = glob(clone0_files) runs_list = [] for filename in globfiles: run_string = re.search('run([^-]+)',filename).group(1) run = int(run_string) if run not in runs_list: runs_list.append(run) runs_list.sort() for run in runs_list: trajectories = dataset.MDTrajDataset("%s/run%d-clone*1.h5" % (path_base,run)) print "Run %s has %s trajectories." % (run,len(trajectories)) for traj in trajectories: # append difference k295e310 = md.compute_contacts(traj, [KER[0]]) e310r409 = md.compute_contacts(traj, [KER[1]]) difference_combinetrajs.append(10*(e310r409[0] - k295e310[0])) # 10x because mdtraj is naturally in nm # append rmsd Activation_Loop_SRC2 = SRC2.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1])) Activation_Loop_kinase = traj.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1])) SRC2_cut = SRC2.atom_slice(Activation_Loop_SRC2) traj_cut = traj.atom_slice(Activation_Loop_kinase) rmsd_combinetrajs.append(10*(md.rmsd(traj_cut,SRC2_cut,frame=0))) # 10x because mdtraj is naturaly in nm # flatten list of arrays difference_combinetrajs = np.asarray([val for sublist in difference_combinetrajs for val in sublist]) rmsd_combinetrajs = np.asarray([val for sublist in rmsd_combinetrajs for val in sublist]) difference.append(difference_combinetrajs) difference_combinetrajs = [] rmsd.append(rmsd_combinetrajs) rmsd_combinetrajs = [] return [rmsd, difference]
def read_and_featurize(traj_file, features_dir = None, condition=None, dihedral_types = ["phi", "psi", "chi1", "chi2"], dihedral_residues = None, resSeq_pairs = None, iterative = True): a = time.time() dihedral_indices = [] residue_order = [] if len(dihedral_residues) > 0: for dihedral_type in dihedral_types: if dihedral_type == "phi": dihedral_indices.append(phi_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "psi": dihedral_indices.append(psi_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "chi1": dihedral_indices.append(chi1_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "chi2": dihedral_indices.append(chi2_indices(fix_topology(top), dihedral_residues)) #print("new features has dim %d" %(2*len(phi_tuples) + 2*len(psi_tuples) + 2*len(chi2_tuples))) #print("feauturizing manually:") dihedral_angles = [] for dihedral_type in dihedral_indices: angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=dihedral_type)) dihedral_angles.append(np.sin(angles)) dihedral_angles.append(np.cos(angles)) manual_features = np.transpose(np.concatenate(dihedral_angles)) if len(resSeq_pairs) > 0: top = md.load_frame(traj_file, index=0).topology resIndex_pairs = convert_resSeq_to_resIndex(top, resSeq_pairs) contact_features = [] if iterative: try: for chunk in md.iterload(traj_file, chunk = 1000): # chunk = fix_traj(chunk) #chunk = md.load(traj_file,stride=1000) #print(resIndex_pairs[0:10]) chunk_features = md.compute_contacts(chunk, contacts = resIndex_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0] print(np.shape(chunk_features)) contact_features.append(chunk_features) contact_features = np.concatenate(contact_features) except Exception,e: print str(e) print("Failed") return #traj = md.load(traj_file) #contact_features = md.compute_contacts(chunk, contacts = contact_residue_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0] else: try: traj = md.load(traj_file) contact_features = md.compute_contacts(traj, contacts = resIndex_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0] except Exception,e: print str(e) print("Failed for traj") return
def __init__(self, native, group1, group2, ca_cutoff_angstroms=10., verbose=True): self.native = native[0] # ensure only a single frame is passed res_group1, res_group2 = [ np.array( sorted(set([native.topology.atom(i).residue.index for i in g]))) for g in (group1, group2) ] contact_pairs = np.array([(i, j) for i in res_group1 for j in res_group2]) is_contact = ( 10. * md.compute_contacts(native, scheme='ca', contacts=contact_pairs)[0] < ca_cutoff_angstroms)[0] contacts = contact_pairs[is_contact] interface_residues = sorted( set(contacts[:, 0]).union(set(contacts[:, 1]))) if verbose: print '%i interface residues (%i,%i)' % (len(interface_residues), len(set(contacts[:, 0])), len(set(contacts[:, 1]))) self.interface_atom_indices = np.array([ a.index for a in native.topology.atoms if a.residue.index in interface_residues ])
def get_interface_contacts(frame, ca_cutoff_ang=10.): """ Identify interface residues between ligand chains and receptor chains using mdtraj. Residues identified by user-specified c-alpha cutoff, preset to 10 angstroms. Feeds into contacts_iterator. """ #Get list of residues in receptor and ligand r_residues = [] for chain in self.receptor_chains: r_residues.extend([residue.index for residue in frame.topology.chain(chain).residues]) l_residues = [] for chain in self.ligand_chains: l_residues.extend([residue.index for residue in frame.topology.chain(chain).residues]) # Make an array of potential contact pairs between receptor and ligand contact_pairs = np.array([(i,j) for i in r_residues for j in l_residues]) # Check which ones fall within c-alpha distance cutoff is_contact = (10.*md.compute_contacts(frame, scheme='ca', contacts=contact_pairs)[0] < ca_cutoff_ang)[0] # Go from bool truth values to the actual residues contacts = contact_pairs[is_contact] # Go from pairs to flattened list of unique residues involved in contacts self.interface_residues = sorted(set(contacts[:,0]).union(set(contacts[:,1]))) return self.interface_residues
def __init__(self, struct): # read the structure from a pdb file (one chain, no hetatm, no water, just the good old protein, please self.struct = struct # compute the contacts. This computes all the minimum atom distances between residues. The cutoff will be applied later self.dd, self.rp = mdtraj.compute_contacts(self.struct) # ah si, this is the sequence of the pdb chain (the residues which are resolved, may be less than the FASTA from DB website) self.seq = ''.join([r.code for r in self.struct.topology.residues])
def maker_w_strings(data): i, sdf_state, num_residues, pdb = data tmp_file = str(id(multiprocessing.current_process())) create_pdb(sdf_state, pdb, tmp_file) # create the pdb for one state # Load the file into mdtraj t = md.load("tmp{0}.pdb".format(tmp_file)) pl = len(list(t.topology.residues)) resi = np.arange(pl) pairs = list(itertools.product(resi, resi)) # compute constacts matrix, l = md.compute_contacts(t, contacts=pairs, scheme="closest-heavy") matrix = np.array(matrix).reshape((pl, pl)) ### generate protein-ligand mask mask = np.zeros((pl, pl)) for i in range(pl - num_residues): for j in range(pl - num_residues): # ligand mask[i, j] = -1 for i in range(pl - num_residues, pl): for j in range(pl - num_residues, pl): # protein mask[i, j] = 1 p_img = cv2.resize(matrix, dsize=(64, 64), interpolation=cv2.INTER_CUBIC).reshape(64, 64, 1) m_img = cv2.resize(mask, dsize=(64, 64), interpolation=cv2.INTER_CUBIC).reshape(64, 64, 1) p_contact_matrix = np.concatenate([p_img, m_img], axis=-1) os.remove("tmp{0}.pdb".format(tmp_file)) return p_contact_matrix
def get_dists(filename): conf = md.load(filename) num_peptide_residues = len(conf.top.select("name == CA and chainid == 2")) num_binding_site_residues = 180 num_total_residues = len(conf.top.select("name == CA")) peptide_indices = range(num_total_residues)[-num_peptide_residues:] binding_site_indices = range(num_binding_site_residues) contacts_all = list( itertools.product(peptide_indices, binding_site_indices)) #print("Num contacts:", len(contacts_all)) distances, pairs = md.compute_contacts(conf, contacts_all) distances = distances[0] # only a single frame interactions = [] resres_dists = [] residues = [r for r in conf.top.residues] for i, p in enumerate(pairs): r1 = str(residues[p[0]])[:3] r2 = str(residues[p[1]])[:3] interactions.append(r1 + "-" + r2) resres_dists.append(distances[i] * 10) return interactions, resres_dists
def featurize_sig(filename): conf = md.load(filename) num_peptide_residues = len(conf.top.select("name == CA and chainid == 2")) num_binding_site_residues = 180 num_total_residues = len(conf.top.select("name == CA")) peptide_indices = range(num_total_residues)[-num_peptide_residues:] binding_site_indices = range(num_binding_site_residues) contacts_all = list( itertools.product(peptide_indices, binding_site_indices)) #print("Num contacts:", len(contacts_all)) distances, pairs = md.compute_contacts(conf, contacts_all) distances = distances[0] # only a single frame feature_vec = np.zeros((num_dim, )) residues = [r for r in conf.top.residues] for i, p in enumerate(pairs): r1 = str(residues[p[0]])[:3] r2 = str(residues[p[1]])[:3] resres_dist = distances[i] * 10 alpha = 5 #np.log(99)+4 feature_vec[new_interaction_to_index[ r1 + "-" + r2]] += 1. / (1 + np.exp(resres_dist - alpha)) if abs(distances[i]) < 0.001: print(r1, r2, distances[i], p) return feature_vec
def get_distances(filename): conf = md.load(filename) num_peptide_residues = len(conf.top.select("name == CA and chainid == 2")) num_binding_site_residues = 180 num_total_residues = len(conf.top.select("name == CA")) peptide_indices = range(num_total_residues)[-num_peptide_residues:] binding_site_indices = range(num_binding_site_residues) contacts_all = list( itertools.product(peptide_indices, binding_site_indices)) #print("Num contacts:", len(contacts_all)) distances, pairs = md.compute_contacts(conf, contacts_all) distances = distances[0] # only a single frame feature_vec = np.zeros((num_dim, )) residues = [r for r in conf.top.residues] resres_names = [] pep_mhc_distances = [] for i, p in enumerate(pairs): r1 = str(residues[p[0]]) r2 = str(residues[p[1]]) resres_names.append([r1, r2]) pep_mhc_distances.append(distances[i] * 10) #feature_vec[new_interaction_to_index[r1+"-"+r2]] += 1./(distances[i]*10) #if abs(distances[i]) < 0.001: print(r1, r2, distances[i], p) return resres_names, pep_mhc_distances
def compute_contacts_below_cutoff(traj_file_frame, cutoff = 100000.0, contact_residues = [], anton = False): traj_file = traj_file_frame[0] frame = md.load_frame(traj_file, index = 0) #frame = fix_traj(frame) top = frame.topology distance_residues = [] res_indices = [] resSeq_to_resIndex = {} residue_full_infos = [] for i in range(0, len(contact_residues)): residue = contact_residues[i] indices = [r.index for r in top.residues if r.resSeq == residue[1] and r.chainid == residue[0] and not r.is_water] if len(indices) == 0: print("No residues in trajectory for residue %d" %residue) continue else: ind = indices[0] for j in indices: if j != ind: #print("Warning: multiple res objects for residue %d " %residue) if "CB" in [str(a) for a in r.atoms for r in top.residues if r.index == ind]: ind = j res_indices.append(ind) distance_residues.append(residue) resSeq_to_resIndex[residue] = ind resSeq_combinations = itertools.combinations(distance_residues, 2) res_index_combinations = [] resSeq_pairs = [c for c in resSeq_combinations] for combination in resSeq_pairs: res0 = combination[0] res1 = combination[1] res_index0 = resSeq_to_resIndex[res0] res_index1 = resSeq_to_resIndex[res1] res_index_combinations.append((res_index0, res_index1)) final_resSeq_pairs = [] final_resIndex_pairs = [] distances = md.compute_contacts(frame, contacts = res_index_combinations, scheme = 'closest-heavy', ignore_nonprotein=False)[0] #print(distances) print(np.shape(distances)) for i in range(0, len(distances[0])): distance = distances[0][i] #print(distance) if distance < cutoff: final_resIndex_pairs.append(res_index_combinations[i]) final_resSeq_pairs.append(resSeq_pairs[i]) for pair in final_resIndex_pairs: info0 = [(r.resSeq, r.name, r.chain.index) for r in top.residues if r.index == pair[0]] info1 = [(r.resSeq, r.name, r.chain.index) for r in top.residues if r.index == pair[1]] residue_full_infos.append((info0, info1)) print(len(final_resSeq_pairs)) print(len(final_resIndex_pairs)) return((final_resSeq_pairs, residue_full_infos))
def partial_transform(self, traj): """Featurize an MD trajectory into a vector space derived from residue-residue distances Parameters ---------- traj : mdtraj.Trajectory A molecular dynamics trajectory to featurize. Returns ------- features : np.ndarray, dtype=float, shape=(n_samples, n_features) A featurized trajectory is a 2D array of shape `(length_of_trajectory x n_features)` where each `features[i]` vector is computed by applying the featurization function to the `i`th snapshot of the input trajectory. See Also -------- transform : simultaneously featurize a collection of MD trajectories """ distances, _ = md.compute_contacts(traj, self.contacts, self.scheme, self.ignore_nonprotein) return self._transform(distances)
def igeom2mindist_COMdist_truncation(igeom, res_COM_cutoff_Ang=25, ): COMs_xyz = geom2COMxyz(igeom) COMs_dist_triu = _np.array([pdist(ixyz) for ixyz in COMs_xyz]) COMs_under_cutoff = COM_n_from_COM_dist_triu(COMs_dist_triu, cutoff_nm=res_COM_cutoff_Ang/10) COMs_under_cutoff_pair_idxs = _np.argwhere(COMs_under_cutoff.sum(0) >= 1).squeeze() pairs = _np.vstack(_np.triu_indices(igeom.n_residues, 1)).T[COMs_under_cutoff_pair_idxs] try: ctcs, ctc_idxs_dummy = _md.compute_contacts(igeom, pairs) except MemoryError: print("\nCould not fit %u contacts for %u frames into memory"%(len(pairs), igeom.n_frames)) raise assert _np.allclose(pairs, ctc_idxs_dummy) return ctcs.min(0), pairs, COMs_under_cutoff_pair_idxs
def describe_features(self, traj): """Return a list of dictionaries describing the features in Contacts.""" x = [] # fill in the atom indices using just the first frame distances, residue_indices = md.compute_contacts(traj, self.contacts, self.scheme, self.ignore_nonprotein) n = residue_indices.shape[0] aind = ["N/A"] * n resSeq = [np.array([traj.top.residue(j).resSeq for j in i]) for i in residue_indices] resid = [np.array([traj.top.residue(j).index for j in i]) for i in residue_indices] resnames = [[traj.topology.residue(j).name for j in i] for i in resid] bigclass = [self.contacts] * n smallclass = [self.scheme] * n otherInfo = [self.ignore_nonprotein] * n for i in range(n): d_i = dict( resname=resnames[i], atomind=aind[i], resSeq=resSeq[i], resid=resid[i], otherInfo=otherInfo[i], bigclass=bigclass[i], smallclass=smallclass[i], ) x.append(d_i) return x
def _Compute_Contacts_Between_Residues(self, Pairs=None, Cutoff=0.5, Trajectory=None): output = mdtraj.compute_contacts(Trajectory, Pairs) distances = output[0].T atom_pairs = output[1] residues_in_contact = [] contacts = [] idx = 0 cont = 0 for frame in distances: for d in frame: if (d < Cutoff): atom_pair = atom_pairs[idx] resid1 = self.top.residue(atom_pair[0]) resid2 = self.top.residue(atom_pair[1]) arr = [resid1, resid2] if (arr not in residues_in_contact): residues_in_contact.append(arr) cont += 1 contacts.append(cont) cont = 0 idx += 1 return (residues_in_contact, contacts)
def partial_transform(self, traj): """Featurize an MD trajectory into a vector space derived from residue-residue distances Parameters ---------- traj : mdtraj.Trajectory A molecular dynamics trajectory to featurize. Returns ------- features : np.ndarray, dtype=float, shape=(n_samples, n_features) A featurized trajectory is a 2D array of shape `(length_of_trajectory x n_features)` where each `features[i]` vector is computed by applying the featurization function to the `i`th snapshot of the input trajectory. See Also -------- transform : simultaneously featurize a collection of MD trajectories """ # check to make sure topologies are consistent with the reference frame try: assert traj.top == self.reference_frame.top except: warnings.warn("The topology of the trajectory is not" + "the same as that of the reference frame," + "which might give meaningless results.") distances, _ = md.compute_contacts(traj, self.contacts, self.scheme, ignore_nonprotein=False) return self._transform(distances)
def test_contact_4(get_fn): pdb = md.load( get_fn('1am7_protein.pdb') ) # protonated and including at least one glycine residue (which has no heavy atoms in its sidechain) contacts = md.compute_contacts( pdb, contacts='all', scheme='sidechain-heavy' ) # test passes if this doesn't raise an exception
def test_Residue_Mindist_Ca_all(self): n_ca = self.feat.topology.n_atoms self.feat.add_residue_mindist(scheme='ca') D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0] assert np.allclose(D, Dref) assert len(self.feat.describe()) == self.feat.dimension()
def test_Residue_Mindist_Ca_array(self): contacts=np.array([[20,10,], [10,0]]) self.feat.add_residue_mindist(scheme='ca', residue_pairs=contacts) D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca', contacts=contacts)[0] assert np.allclose(D, Dref) assert len(self.feat.describe())==self.feat.dimension()
def partial_transform(self, traj): """Featurize an MD trajectory into a vector space derived from residue-residue distances Parameters ---------- traj : mdtraj.Trajectory A molecular dynamics trajectory to featurize. Returns ------- features : np.ndarray, dtype=float, shape=(n_samples, n_features) A featurized trajectory is a 2D array of shape `(length_of_trajectory x n_features)` where each `features[i]` vector is computed by applying the featurization function to the `i`th snapshot of the input trajectory. See Also -------- transform : simultaneously featurize a collection of MD trajectories """ # check to make sure topologies are consistent with the reference frame try: assert traj.top == self.reference_frame.top except: warnings.warn("The topology of the trajectory is not" + "the same as that of the reference frame," + "which might give meaningless results.") distances, _ = md.compute_contacts(traj, self.contacts, self.scheme, ignore_nonprotein=False, periodic = self.periodic) return self._transform(distances)
def describe_features(self, traj): """Return a list of dictionaries describing the features in Contacts.""" x = [] # fill in the atom indices using just the first frame distances, residue_indices = md.compute_contacts( traj, self.contacts, self.scheme, self.ignore_nonprotein) n = residue_indices.shape[0] aind = ["N/A"] * n resSeq = [ np.array([traj.top.residue(j).resSeq for j in i]) for i in residue_indices ] resid = [ np.array([traj.top.residue(j).index for j in i]) for i in residue_indices ] resnames = [[traj.topology.residue(j).name for j in i] for i in resid] bigclass = [self.contacts] * n smallclass = [self.scheme] * n otherInfo = [self.ignore_nonprotein] * n for i in range(n): d_i = dict(resname=resnames[i], atomind=aind[i], resSeq=resSeq[i], resid=resid[i], otherInfo=otherInfo[i], bigclass=bigclass[i], smallclass=smallclass[i]) x.append(d_i) return x
def compute_mdtraj_order_parmeters(trajectory_file, rmsd_reference_structure=None): # documentation: http://mdtraj.org/1.8.0/analysis.html# trajectory = md.load(trajectory_file) return_values = [] return_value_names = [] if not rmsd_reference_structure == None: reference = md.load(rmsd_reference_structure) rmsd = md.rmsd(trajectory, reference) return_values.append(rmsd) return_value_names.append("RMSD") hydrogen_bonds = np.array([np.sum(x) for x in md.kabsch_sander(trajectory)]) return_values.append(hydrogen_bonds) return_value_names.append("HBondEnergy") ss = md.compute_dssp(trajectory) shape = ss.shape transdict = dict(zip(list(set(list(ss.flatten()))),range(len(list(set(list(ss.flatten()))))))) ss = np.array([transdict[x] for x in ss.flatten()]).reshape(shape).T return_values.append(ss) return_value_names.append("SecondaryStructure") rg = md.compute_rg(trajectory) return_values.append(rg) return_value_names.append("Rg") distances, residue_pairs = md.compute_contacts(trajectory, scheme='ca') contacts = md.geometry.squareform(distances, residue_pairs) return_values.append(contacts) return_value_names.append("Contacts") return dict(zip(return_value_names, return_values))
def contacts_bonds(traj, peptide_chain ): group_1 = [residue.index for residue in traj.topology.chain(peptide_chain).residues ] group_2 = [residue.index for residue in traj.topology.chain(0).residues or traj.topology.chain(1).residues or traj.topology.chain(2).residues ] pairs = list(product(group_1, group_2)) contacts_bonds= mdtraj.compute_contacts(traj,pairs , scheme='closest-heavy', ignore_nonprotein=True, periodic=True, soft_min=False, soft_min_beta=20)
def plot_native_state_contact_map(title): colors = [('white')] + [(cm.jet(i)) for i in xrange(1,256)] new_map = matplotlib.colors.LinearSegmentedColormap.from_list('new_map', colors, N=256) if os.path.exists("contact_pairs.dat") and os.path.exists("contact_probabilities.dat"): pairs = np.loadtxt("contact_pairs.dat") probability = np.loadtxt("contact_probabilities.dat") else: print " Loading BeadBead.dat" beadbead = np.loadtxt("BeadBead.dat",dtype=str) sigij = beadbead[:,5].astype(float) epsij = beadbead[:,6].astype(float) deltaij = beadbead[:,7].astype(float) interaction_numbers = beadbead[:,4].astype(str) pairs = beadbead[:,:2].astype(int) pairs -= np.ones(pairs.shape,int) np.savetxt("contact_pairs.dat",pairs) print " Computing distances with mdtraj..." traj = md.load("traj.xtc",top="Native.pdb") distances = md.compute_contacts(traj,pairs) contacts = (distances[0][:] <= 1.2*sigij).astype(int) print " Computing contact probability..." probability = sum(contacts.astype(float))/contacts.shape[0] np.savetxt("contact_probabilities.dat",probability) Qref = np.loadtxt("Qref_cryst.dat") C = np.zeros(Qref.shape,float) for k in range(len(pairs)): C[pairs[k][0],pairs[k][1]] = probability[k] print " Plotting..." plt.figure() plt.subplot(1,1,1,aspect=1) ax = plt.subplot(1,1,1,aspect=1) plt.pcolor(C,cmap=new_map) for k in range(len(pairs)): if probability[k] > 0.01: plt.plot(pairs[k][1],pairs[k][0],marker='s',ms=3.0,markeredgecolor=new_map(probability[k]),color=new_map(probability[k])) else: continue plt.xlim(0,len(Qref)) plt.ylim(0,len(Qref)) #plt.text(10,70,name.upper(),fontsize=70,color="r") ax = plt.gca() cbar = plt.colorbar() cbar.set_clim(0,1) cbar.set_label("Contact probability",fontsize=20) cbar.ax.tick_params(labelsize=20) plt.xlabel("Residue i",fontsize=20) plt.ylabel("Residue j",fontsize=20) #plt.title("Native State Contact Map "+title,fontsize=20) plt.title(title) for label in ax.get_xticklabels() + ax.get_yticklabels(): label.set_fontsize(15) print " Saving..." plt.savefig("native_state_contact_map.pdf")
def ca_contact_pca(traj, n_pc, cutoff_angstroms=8., variance_scaled=True): from sklearn.decomposition import TruncatedSVD m = (10.*md.compute_contacts(traj,scheme='ca')[0]<cutoff_angstroms) m = m-m.mean(axis=0) trunc_svd = TruncatedSVD(n_pc).fit(m) pc = trunc_svd.transform(m)*(trunc_svd.explained_variance_ratio_ if variance_scaled else 1.) del m return pc
def compute_contacts(self, residue_pairs): """ :param residue_pairs: An array containing pairs of indices (0-indexed) of residues to compute the contacts between :return: distances: np.ndarray, shape=(n_frames, n_pairs); residues_pairs: np.ndarray, shape=(n_pairs, 2) """ return md.compute_contacts(self.traj, residue_pairs)
def test_contact_3(get_fn): pdb = md.load(get_fn('bpti.pdb')) beta = 20 dists, pairs = md.compute_contacts(pdb, soft_min=True, soft_min_beta=beta) maps = md.geometry.squareform(dists, pairs) for i, (r0, r1) in enumerate(pairs): for t in range(pdb.n_frames): assert np.allclose(beta / np.log(np.sum(np.exp(beta / maps[t, r0, r1]))), dists[t, i])
def _distances(traj: md.Trajectory, scheme: str, transform: str, centre: Union[float, None], steepness: Union[float, None]): feat, ix = md.compute_contacts(traj, contacts='all', scheme=scheme) if transform == 'logistic': assert (centre is not None) and (steepness is not None) tmp = 1.0/(1.+np.exp((-1)*steepness*(feat-centre))) assert np.allclose(tmp.shape, feat.shape) feat = tmp return feat
def _get_contact_pairs(self, contacts): if self.scheme == 'ca': if not any( a for a in self.reference_frame.top.chain(ligand_chain).atoms if a.name.lower() == 'ca'): raise ValueError("Bad scheme: the ligand has no alpha carbons") # this is really similar to mdtraj/contact.py, but ensures that # md.compute_contacts is always seeing an array of exactly the # contacts we want to specify if isinstance(contacts, string_types): if contacts.lower() != 'all': raise ValueError( '({}) is not a valid contacts specifier'.format( contacts.lower())) self.residue_pairs = [] for i in np.arange( self.reference_frame.top.chain( self.protein_chain).n_residues): for j in np.arange( self.reference_frame.top.chain( self.ligand_chain).n_residues): self.residue_pairs.append( (i + self.p_residue_offset, j + self.l_residue_offset)) self.residue_pairs = np.array(self.residue_pairs) if len(self.residue_pairs) == 0: raise ValueError('No acceptable residue pairs found') else: self.residue_pairs = ensure_type(np.asarray(contacts), dtype=np.int, ndim=2, name='contacts', shape=(None, 2), warn_on_cast=False) if not np.all( (self.residue_pairs >= 0) * (self.residue_pairs < self.reference_frame.n_residues)): raise ValueError('contacts requests a residue that is not '\ 'in the permitted range') if self.binding_pocket is not 'all': ref_distances, _ = md.compute_contacts(self.reference_frame, self.residue_pairs, self.scheme, ignore_nonprotein=False) self.residue_pairs = self.residue_pairs[np.where( ref_distances < self.binding_pocket)[1]] if len(self.residue_pairs) == 0: raise ValueError('No residue pairs within binding pocket') return self.residue_pairs
def test_Residue_Mindist_Ca_all_threshold(self): threshold = .7 self.feat.add_residue_mindist(scheme='ca', threshold=threshold) D = self.feat.transform(self.traj) Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0] Dbinary = np.zeros_like(Dref) I = np.argwhere(Dref <= threshold) Dbinary[I[:, 0], I[:, 1]] = 1 assert np.allclose(D, Dbinary) assert len(self.feat.describe()) == self.feat.dimension()
def get_pocket_residues(traj): traj = traj.atom_slice(traj.topology.select("protein or resn UNL")) resn = len(list(traj.topology.residues)) group_1 = list(range(resn - 1)) group_2 = [resn - 1] pairs = list(itertools.product(group_1, group_2)) res, pairs = md.compute_contacts(traj, pairs) pocket_resids = list(np.where(res[0] <= 0.5)[0] + 1) pocket_resids = ["resid {}".format(id) for id in pocket_resids] pocket_resids = " or ".join(pocket_resids) return pocket_resids
def shukla_coords(trajectories,KER,Aloop,SRC2): difference = [] rmsd = [] for traj in trajectories: # append difference k295e310 = md.compute_contacts(traj, [KER[0]]) e310r409 = md.compute_contacts(traj, [KER[1]]) difference.append(10*(e310r409[0] - k295e310[0])) # 10x because mdtraj is naturally in nm # append rmsd Activation_Loop_SRC2 = SRC2.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1])) Activation_Loop_kinase = traj.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1])) SRC2_cut = SRC2.atom_slice(Activation_Loop_SRC2) traj_cut = traj.atom_slice(Activation_Loop_kinase) rmsd.append(10*(md.rmsd(traj_cut,SRC2_cut,frame=0))) # 10x because mdtraj is naturaly in nm return [rmsd, difference]
def test_contact_1(): pdb = md.load(get_fn('bpti.pdb')) dists, pairs = md.compute_contacts(pdb) for r0, r1 in pairs: # are these valid residue indices? pdb.topology.residue(r0) pdb.topology.residue(r1) assert not (abs(r0 - r1) < 3) maps = md.geometry.squareform(dists, pairs) for i, (r0, r1) in enumerate(pairs): for t in range(pdb.n_frames): eq(maps[t, r0, r1], dists[t, i])
def test_ContactFeaturizer_describe_features(): scheme = np.random.choice(['ca','closest','closest-heavy']) feat = ContactFeaturizer(scheme=scheme, ignore_nonprotein=True) rnd_traj = np.random.randint(len(trajectories)) features = feat.transform([trajectories[rnd_traj]]) df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj])) for f in range(25): f_index = np.random.choice(len(df)) residue_ind = df.iloc[f_index].resids feature_value, _ = md.compute_contacts(trajectories[rnd_traj], contacts=[residue_ind], scheme=scheme) assert (features[0][:, f_index] == feature_value.flatten()).all()
def describe_features(self, traj): """Return a list of dictionaries describing the contacts features. Parameters ---------- traj : mdtraj.Trajectory The trajectory to describe Returns ------- feature_descs : list of dict Dictionary describing each feature with the following information about the atoms participating in each dihedral - resnames: unique names of residues - atominds: the four atom indicies - resseqs: unique residue sequence ids (not necessarily 0-indexed) - resids: unique residue ids (0-indexed) - featurizer: Contact - featuregroup: ca, heavy etc. """ feature_descs = [] # fill in the atom indices using just the first frame distances, residue_indices = md.compute_contacts(traj[0], self.contacts, self.scheme, ignore_nonprotein=False, periodic=self.periodic) top = traj.topology aind = [] resseqs = [] resnames = [] for resid_ids in residue_indices: aind += ["N/A"] resseqs += [[top.residue(ri).resSeq for ri in resid_ids]] resnames += [[top.residue(ri).name for ri in resid_ids]] zippy = itertools.product(["Ligand Contact"], [self.scheme], ["N/A"], zip(aind, resseqs, residue_indices, resnames)) feature_descs.extend(dict_maker(zippy)) return feature_descs
def _get_contact_pairs(self, contacts): if self.scheme=='ca': if not any(a for a in self.reference_frame.top.chain(self.ligand_chain).atoms if a.name.lower() == 'ca'): raise ValueError("Bad scheme: the ligand has no alpha carbons") # this is really similar to mdtraj/contact.py, but ensures that # md.compute_contacts is always seeing an array of exactly the # contacts we want to specify if isinstance(contacts, string_types): if contacts.lower() != 'all': raise ValueError('({}) is not a valid contacts specifier'.format(contacts.lower())) self.residue_pairs = [] for i in np.arange(self.reference_frame.top.chain(self.protein_chain).n_residues): for j in np.arange(self.reference_frame.top.chain(self.ligand_chain).n_residues): self.residue_pairs.append((i+self.p_residue_offset, j+self.l_residue_offset)) self.residue_pairs = np.array(self.residue_pairs) if len(self.residue_pairs) == 0: raise ValueError('No acceptable residue pairs found') else: self.residue_pairs = ensure_type(np.asarray(contacts), dtype=np.int, ndim=2, name='contacts', shape=(None, 2), warn_on_cast=False) if not np.all((self.residue_pairs >= 0) * (self.residue_pairs < self.reference_frame.n_residues)): raise ValueError('contacts requests a residue that is not '\ 'in the permitted range') if self.binding_pocket is not 'all': ref_distances, _ = md.compute_contacts(self.reference_frame, self.residue_pairs, self.scheme, ignore_nonprotein=False, periodic = self.periodic) self.residue_pairs = self.residue_pairs[np.where(ref_distances< self.binding_pocket)[1]] if len(self.residue_pairs) == 0: raise ValueError('No residue pairs within binding pocket') return self.residue_pairs
def test_contact_2(): pdb = md.load(get_fn('1vii_sustiva_water.pdb')) dists, pairs = md.compute_contacts(pdb, scheme='closest') for r0, r1 in pairs: assert pdb.topology.residue(r0).name != 'HOH' assert pdb.topology.residue(r1).name != 'HOH' # spot check one of the pairs r0, r1 = pairs[10] atoms_r0 = [a.index for a in pdb.topology.residue(r0).atoms] atoms_r1 = [a.index for a in pdb.topology.residue(r1).atoms] atomdist = md.compute_distances(pdb, list(itertools.product(atoms_r0, atoms_r1))) np.testing.assert_array_equal(dists[:, 10], np.min(atomdist, axis=1)) maps = md.geometry.squareform(dists, pairs) for i, (r0, r1) in enumerate(pairs): for t in range(pdb.n_frames): eq(maps[t, r0, r1], dists[t, i])
def find_respairs_that_changed(fnames, scheme = 'ca', # or 'closest' or 'closest-heavy' threshold = 0.4, stride = 100, max_respairs = 1000): ''' Parameters ---------- fnames : list of paths to trajectories scheme : 'ca' or 'closest' or 'closest-heavy' threshold : float contact threshold (nm) ''' distances = [] for fname in fnames: traj = md.load(fname,stride=stride) pairwise_distances,residue_pairs = md.compute_contacts(traj,scheme=scheme) distances.append(pairwise_distances) distances = np.vstack(distances) # identify contacts that change by counting how many times the distances were # greater than and less than the threshold num_times_greater_than = (distances>threshold).sum(0) num_times_less_than = (distances<threshold).sum(0) changed = (num_times_greater_than > 0) * (num_times_less_than > 0) print("Number of contacts that changed: {0}".format(changed.sum())) print("Total number of possible contacts: {0}".format(len(residue_pairs))) if len(changed) > max_respairs: n_diff = np.min(np.vstack((num_times_less_than,num_times_greater_than)),0) indices = sorted(np.arange(len(n_diff)),key=lambda i:-n_diff[i]) changed = indices[:max_respairs] # now turn this bitmask into a list of relevant residue pairs respairs_that_changed = residue_pairs[changed] return respairs_that_changed
def prepare_trajectory(self, trajectory): """Prepare a trajectory for distance calculations based on the contact map. Each frame in the trajectory will be represented by a vector where each entries represents the distance between two residues in the structure. Depending on what contacts you pick to use, this can be a 'native biased' picture or not. Paramters --------- trajectory : mdtraj.Trajectory The trajectory to prepare Returns ------- pairwise_distances : ndarray 1D array of various residue-residue distances """ # the result of md.compute_contacts is a tuple, where the distances are # returned in the first element, and a list of contacts calculated are # returned in the second element return md.compute_contacts(trajectory, self.contacts, self.scheme)[0]
def get_distances(fname, scheme, stride): ''' Function callable by a multiprocessing Pool Parameters ---------- fname : string filename of trajectory scheme : string 'ca' or 'closest' or 'closest-heavy' stride : int thinning factor: only look at every `stride`th frame Returns ------- pairwise_distances : numpy array residue_pairs : list of tuples ''' traj = md.load(fname, stride = stride) pairwise_distances,residue_pairs = md.compute_contacts(traj, scheme = scheme) return pairwise_distances, residue_pairs
def read_and_featurize_iter(traj_file, features_dir = None, condition=None, dihedral_types = ["phi", "psi", "chi1", "chi2"], dihedral_residues = None, contact_residues = None): a = time.time() dihedral_indices = [] residue_order = [] if len(dihedral_residues) > 0: for dihedral_type in dihedral_types: if dihedral_type == "phi": dihedral_indices.append(phi_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "psi": dihedral_indices.append(psi_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "chi1": dihedral_indices.append(chi1_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "chi2": dihedral_indices.append(chi2_indices(fix_topology(top), dihedral_residues)) #print("new features has dim %d" %(2*len(phi_tuples) + 2*len(psi_tuples) + 2*len(chi2_tuples))) #print("feauturizing manually:") dihedral_angles = [] for dihedral_type in dihedral_indices: angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=dihedral_type)) dihedral_angles.append(np.sin(angles)) dihedral_angles.append(np.cos(angles)) manual_features = np.transpose(np.concatenate(dihedral_angles)) if len(contact_residues) > 0: contact_features = [] for chunk in md.iterload(traj_file, chunk = 10000): fixed_traj = fix_traj(chunk) fixed_top = fixed_traj.topology distance_residues = [] res_objects = [r for r in fixed_top.residues] for r in contact_residues: for res in res_objects: if res.resSeq == r and len(res._atoms) > 5: #print res._atoms distance_residues.append(res.index) if len(contact_residues) != len(distance_residues): print "Residues are missing" print len(contact_residues) print len(distance_residues) #sys.exit() #return None combinations = itertools.combinations(distance_residues, 2) pairs = [c for c in combinations] #print pairs contact_features.append(md.compute_contacts(fixed_traj, contacts = pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0]) contact_features = np.concatenate(contact_features) if len(dihedral_residues) > 0: manual_features = np.column_stack((manual_features, contact_features)) else: manual_features = contact_features b = time.time() print("new features %s has shape: " %traj_file) print(np.shape(manual_features)) if condition is None: condition = get_condition(traj_file) verbosedump(manual_features, "%s/%s.h5" %(features_dir, condition))
def featurize(path_to_files,model_name): files = glob(path_to_files) print("Number of files: {0}".format(len(files))) # timestep between frames: 250 picoseconds #### A. FEATURE EXTRACTION #### ### Step 1: identifying interresidue contacts that change # compute full contact maps for a strided subset of the simulation frames strided_distances=[] stride=100 # stride within trajectory traj_thin=5 # only look at 1 in traj_thin trajectories scheme = 'ca' threshold = 0.8 # contact threshold in angstroms for f in files[::traj_thin]: traj = md.load(f,stride=stride) distances,residue_pairs = md.compute_contacts(traj,scheme=scheme) strided_distances.append(distances) strided_distances = np.vstack(strided_distances) # identify contacts that change by counting how many times the distances were # greater than and less than the threshold num_times_greater_than = (strided_distances>threshold).sum(0) num_times_less_than = (strided_distances<threshold).sum(0) changed = (num_times_greater_than > 0) * (num_times_less_than > 0) print("Number of contacts that changed: {0}".format(changed.sum())) print("Total number of possible contacts: {0}".format(len(residue_pairs))) # now turn this bitmask into a list of relevant residue pairs respairs_that_changed = residue_pairs[changed] # save this list! np.save('{0}_respairs_that_changed.npy'.format(model_name),respairs_that_changed) ### Step 2: extract these selected features from the full dataset X = [] traj_thin=1 # only look at 1 in traj_thin trajectories files_of_interest = files[::traj_thin] for i,f in enumerate(files_of_interest): print('{0}/{1}'.format(i,len(files_of_interest))) traj = md.load(f) distances,_ = md.compute_contacts(traj,contacts=respairs_that_changed,scheme=scheme) X.append(distances) print("Initial dimensionality: {0}".format(X[0].shape[1])) print("# frames: {0}".format(np.vstack(X).shape[0])) ##### B. KINETIC DISTANCE LEARNING ##### tica = pyemma.coordinates.tica(X) Y = tica.get_output() # save tica model and output np.savez_compressed('{0}_tica.npz'.format(model_name),*Y) print("Dimensionality after tICA, retaining enough eigenvectors to explain 0.95 of kinetic variation: {0}".format(np.vstack(Y).shape[1]))
from __future__ import print_function import mdtraj as md import numpy as np import itertools t=md.load('1yrc_added.pdb') # reference # Extract interface index group1 = range(0,85) group2 = range(85,98) pairs = list(itertools.product(group1, group2)) A=md.compute_contacts(t, pairs, scheme='closest-heavy') H=[] A1=min(A[0]) # distance array A2=A[1] # residue pairs for x in range(len(A1)): if A1[x]<=1.0: # condition 1 H.append(x) # Extract the index from A2 which satisfy condition1 and save it to A3 A3=A2[H] A4=[] A5=[] # Extract the index of residues which belong to protein for x in range(len(A3)): A4.append(A3[x][0]) A5=list(set(A4)) # the protein residues' index which consist of the interface
print " Loading BeadBead.dat" beadbead = np.loadtxt("BeadBead.dat",dtype=str) sigij = beadbead[:,5].astype(float) epsij = beadbead[:,6].astype(float) deltaij = beadbead[:,7].astype(float) interaction_numbers = beadbead[:,4].astype(int) pairs = beadbead[:,:2].astype(int) pairs -= np.ones(pairs.shape,int) pairs = pairs[ interaction_numbers != 0 ] sigij = sigij[ interaction_numbers != 0 ] print " Computing distances with mdtraj..." traj = md.load("traj.xtc",top="Native.pdb") distances = md.compute_contacts(traj,pairs) contacts = (distances[0][:] <= 1.2*sigij).astype(int) keep_frames = (((x > bounds[0]).astype(int)*(x < bounds[1]).astype(int)) == 1) contacts = contacts[keep_frames,:] print " Computing contact probability..." probability = sum(contacts.astype(float))/contacts.shape[0] C = np.zeros(Qref.shape,float) for k in range(len(pairs)): C[pairs[k][0],pairs[k][1]] = probability[k] print " Plotting..." plt.figure() plt.subplot(1,1,1,aspect=1)
def read_and_featurize_custom(traj_file, features_dir = None, condition=None, dihedral_types = ["phi", "psi", "chi1", "chi2"], dihedral_residues = None, contact_residues = None): #if "23" not in traj_file and "24" not in traj_file: return top = md.load_frame(traj_file,index = 0).topology #atom_indices = [a.index for a in top.atoms if a.residue.resSeq != 130] atom_indices = [a.index for a in top.atoms] traj = md.load(traj_file, atom_indices=atom_indices) print traj_file #print traj #print("loaded trajectory") ''' a = time.time() featurizer = DihedralFeaturizer(types = ['phi', 'psi', 'chi2']) features = featurizer.transform(traj) b = time.time() #print(b-a) print("original features has dim") print(np.shape(features)) ''' a = time.time() dihedral_indices = [] residue_order = [] if len(dihedral_residues) > 0: for dihedral_type in dihedral_types: if dihedral_type == "phi": dihedral_indices.append(phi_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "psi": dihedral_indices.append(psi_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "chi1": dihedral_indices.append(chi1_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "chi2": dihedral_indices.append(chi2_indices(fix_topology(top), dihedral_residues)) #print("new features has dim %d" %(2*len(phi_tuples) + 2*len(psi_tuples) + 2*len(chi2_tuples))) #print("feauturizing manually:") dihedral_angles = [] for dihedral_type in dihedral_indices: angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=dihedral_type)) dihedral_angles.append(np.sin(angles)) dihedral_angles.append(np.cos(angles)) manual_features = np.transpose(np.concatenate(dihedral_angles)) if len(contact_residues) > 0: fixed_traj = fix_traj(traj) fixed_top = fixed_traj.topology distance_residues = [] res_objects = [r for r in fixed_top.residues] for r in contact_residues: for res in res_objects: if res.resSeq == r and len(res._atoms) > 5: #print res._atoms distance_residues.append(res.index) if len(contact_residues) != len(distance_residues): print "Residues are missing" print len(contact_residues) print len(distance_residues) #sys.exit() #return None combinations = itertools.combinations(distance_residues, 2) pairs = [c for c in combinations] #print pairs contact_features = md.compute_contacts(traj, contacts = pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0] #print contact_features #print(np.shape(contact_features)) if len(dihedral_residues) > 0: manual_features = np.column_stack((manual_features, contact_features)) else: manual_features = contact_features b = time.time() print("new features %s has shape: " %traj_file) print(np.shape(manual_features)) if condition is None: condition = get_condition(traj_file) verbosedump(manual_features, "%s/%s.h5" %(features_dir, condition))
def create_features(ref, prot, lig, d): set1 = [ref.topology.atom(i).residue.index for i in prot] set2 = [ref.topology.atom(i).residue.index for i in lig] contacts = md.compute_contacts(ref,contacts=list(itertools.product(set1,set2))) atom_set = contacts[1][np.where(contacts[0]<d)[1],:] return atom_set
eps[i][0] = a-33 elif 264<a<342: eps[i][0] = a-67 if 29<b<176: eps[i][1] = b-30 elif 178<b<231: eps[i][1] = b-33 elif 264<b<342: eps[i][1] = b-67 t1=md.load('3SN6-R.pdb') t2=md.load('2RH1.pdb') eps1 = [[eps[i][0], eps[i][1]] for i in range(len(eps))] dist1=md.compute_contacts(t1, contacts=eps1, scheme='closest') dist2=md.compute_contacts(t2, contacts=eps1, scheme='closest') deltaDist = [dist1[0][0][i]-dist2[0][0][i] for i in range(len(dist1[0][0]))] x = [eps[i][2] for i in range(len(eps))] plt.scatter(x , np.absolute(deltaDist)) plt.savefig('fig1.png') plt.show() #################################################################################################################### # calculating dihedrals #################################################################################################################### top1 = md.load('3SN6-R.pdb').topology top2 = md.load('2RH1.pdb').topology dhdrls101 = [] dhdrls102 = []
def calculate_metrics(traj, features, d): contacts = md.compute_contacts(traj, contacts = features) h = np.sum(contacts[0] < .5, axis=1) return h
import mdtraj as md import matplotlib.pyplot as plt import numpy as np from msmbuilder import dataset import seaborn as sns sns.set_style("whitegrid") sns.set_context("poster") #Load trajectory with ensembler models t_models = md.load("../ensembler-models/traj-refine_implicit_md.xtc", top = "../ensembler-models/topol-renumbered-implicit.pdb") #define 'difference' as hydrogen bond distance k295e310 = md.compute_contacts(t_models, [[28,43]]) e310r409 = md.compute_contacts(t_models, [[43,142]]) difference = e310r409[0] - k295e310[0] #define 'rmsd' as RMSD of activation loop from 2SRC structure SRC2 = md.load("../reference-structures/SRC_2SRC_A.pdb") Activation_Loop_SRC2 = [atom.index for atom in SRC2.topology.atoms if (138 <= atom.residue.index <= 158)] Activation_Loop_Src = [atom.index for atom in t_models.topology.atoms if (138 <= atom.residue.index <= 158)] SRC2.atom_slice(Activation_Loop_SRC2) t_models.atom_slice(Activation_Loop_Src) difference = difference[:,0]