def test_trajectory_rmsf(get_fn): t = md.load(get_fn('traj.h5')) for parallel in [True, False]: calculated = md.rmsf(t, t, 0, parallel=parallel) t.superpose(t, 0) avg_xyz = np.average(t.xyz, axis=0) reference = np.sqrt(3*np.mean((t.xyz - avg_xyz)**2, axis=(0, 2))) assert np.sum(np.abs(calculated)) > 0 # check trivial error eq(calculated, reference, decimal=3)
def group_selection(traj_universe, reference_universe, selection, group_selection): new_ref = reference_universe.atom_slice( topology.select(selection + ' and ' + group_selection)) new_traj = traj_universe.atom_slice( topology.select(selection + ' and ' + group_selection)) new_traj.superpose(reference=new_ref, parallel=True) print(new_traj) return md.rmsf(new_traj, new_ref, parallel=True) * 10
def test_trajectory_rmsf_aligned(get_fn): t = md.load(get_fn('traj.h5')) for parallel in [True, False]: # testing different set of atoms for alignment and RMSF calculation atom_indices = range(int(t.n_atoms/2)) rmsf_indices = range(int(t.n_atoms/2), t.n_atoms) t.superpose(t, 99, atom_indices=atom_indices, parallel=False) calculated = md.rmsf(t, None, atom_indices=rmsf_indices, parallel=parallel) avg_xyz = np.average(t.xyz, axis=0) reference = np.sqrt(3*np.mean((t.xyz - avg_xyz)**2, axis=(0, 2)))[rmsf_indices] assert np.sum(np.abs(calculated)) > 0 # check trivial error eq(calculated, reference, decimal=3)
def rmsf_calculation(system_specs, specs): """ Parameters ---------- system_specs : TYPE DESCRIPTION. specs : TYPE DESCRIPTION. Returns ------- TYPE DESCRIPTION. """ (trajectory, topology, results_folder, name) = system_specs (selection, start, stop, timestep, stride, units_x, units_y, task, store_traj, subset) = specs names, indexes, column_index = Featurize.df_template(system_specs, unit=[units_y]) traj = Trajectory.Trajectory.loadTrajectory(system_specs, specs) if traj != None: atom_indices = traj.topology.select(selection) traj.atom_slice(atom_indices, inplace=True) traj.center_coordinates() rmsf = md.rmsf(traj[start:stop:stride], traj[start:stop:stride], 0, precentered=True) rows = rows = pd.Index(np.arange(0, len(atom_indices)), name='Index') column_index = pd.MultiIndex.from_product(indexes, names=names) df_system = pd.DataFrame( rmsf, columns=column_index, index=rows) #index=np.arange(start, stop, stride) #df_system.index.rename ='Index' #Remove non-sense #df_system=df_system.mask(df_system > 90) return df_system else: return pd.DataFrame()
def cal_rmsf_traj(topologyfile: str, trajfile: str, selection='mass >= 2', mode='residue', outfile=None) -> pd.DataFrame: """Calculate the root mean square fluctuation for the trajfile. Args: topologyfile (str): topology file for the trajectory trajfile (str): molecular dynamic simulation trajectory file. selection (str, optional): atom index select to calculate the RMSF. Defaults to 'mass >= 2'. mode (str, optional): calculation mode, atom or residue. Defaults to 'residue'. outfile (str, optional): default is None, outfile to save result. Raises: Exception: mode not reisude or atom Returns: pd.DataFrame: RMSF value with data frame """ univer = mda.Universe(topologyfile) ChainNames = univer.segments.segids traj = md.load(trajfile, top=topologyfile) AtomIndex = traj.topology.select(selection) TrajSelect = traj.atom_slice(AtomIndex) # target, reference, frame of reference rmsf = md.rmsf(TrajSelect, TrajSelect, 0) topology = TrajSelect.topology ColumnNames = ['chain', 'resid', 'resname', 'atomid', 'AtomName', 'RMSF'] modes = ['residue', 'atom'] if mode not in modes: raise Exception('Unsupport mode: %s, only accept residue or atom.' % mode) index = 0 records = [] for chain in topology.chains: for atom in chain.atoms: record = (ChainNames[chain.index], atom.residue.resSeq, atom.residue.name, atom.serial, atom.name, rmsf[index]) records.append(record) index += 1 #RMSFdict['resid'].append(atom.residue.resSeq) #RMSFdict['atom'] RMSFdf = pd.DataFrame(records, columns=ColumnNames) if mode == 'residue': RMSFdf = RMSFdf.groupby(by=['chain', 'resid', 'resname'], as_index=False).agg({'RMSF': 'mean'}) if outfile: RMSFdf.to_csv(outfile, index=False) return RMSFdf
def calc_com(traj_file, top_file, bb=False): ''' load traj without water and ions and return the com traj of each residue Also return the rmsf for each residue for normalization ''' traj = md.load(traj_file, top=top_file) top = traj.topology N = traj.n_residues com_data = list() rmsf_data = list() rmsf = md.rmsf(traj, traj, 0) for i in range(N): if bb == True: atom_ids = top.select('backbone and resid ' + str(i)) else: atom_ids = top.select('name CA and resid ' + str(i)) temp_traj = traj.atom_slice(atom_ids) rmsf_data.append(rmsf[atom_ids]) com_data.append(md.compute_center_of_mass(temp_traj)) del temp_traj return com_data, rmsf_data
def compute_rmsf(traj_selection): traj_selection.center_coordinates() #docs says its faster in this way rmsf = md.rmsf(traj_selection, traj_selection, precentered=True) #precentered only if center_coodinates return rmsf
coords_k = trj.xyz * 10 coords_cb_k = coords_k[:, cb_idx, :] coords_cb_all.append(coords_cb_k) coords_cb_all = np.vstack(coords_cb_all) # score all frames score_list = [] for i in tqdm(range(coords_cb_all.shape[0])): coords = torch.tensor(coords_cb_all[i], dtype=torch.float, device=device) protein = Protein(seq_native, coords, profile_native) energy = protein.get_energy(energy_fn).item() score_list.append(energy) t = md.Trajectory(xyz=coords_cb_all, topology=None) t = t.superpose(t, frame=0) rmsd = md.rmsd(t, t, frame=0) rmsf = md.rmsf(t, t, frame=0) df = pd.DataFrame({'energy': score_list, 'rmsd': rmsd}) df.to_csv(f'{root_dir}/BPTI/BPTI_energy_rmsd.csv', index=False) df = pd.DataFrame({'rmsf': rmsf}) df.to_csv(f'{root_dir}/BPTI/BPTI_rmsf.csv', index=False) if 'val_deep' in md_data_list: root_dir = '/home/hyang/bio/erf/data/decoys/md' trj_dir = f'/home/hyang/bio/openmm/data' pdb_id_list = pd.read_csv(f'{trj_dir}/list', header=None, names=['pdb'])['pdb'].values # pdb_id_list = ['3KXT'] for pdb_id in tqdm(pdb_id_list): pdb_path = f'{root_dir}/{pdb_id}_A_bead.csv' seq_native, coords_native, profile_native = load_protein_bead(pdb_path,'CB', device)
def get_rmsf_data(top, traj, start_frame, stop_frame, stride, selection, superpose, group_selections=None): """ Make RMSF Calculation with Group Selections with mdtraj. Parameters ---------- top: topology file traj : trajectory file start_frame : int Include after this snapshot for your analysis. stop_frame: int Include until this snapshot for your analysis stride: int It will take snapshots at intervals of the "stride" unit you specified for your analysis selection: str Normal Selection superpose: bool Will align Snapshots for best fitting group_selections: list list of domains selections Example ---------- or_rmsf, domain_rmsf, time, residue_list = get_rmsf_data(top='test/protein.pdb', traj='test/50_frame.dcd', start_frame=0, stop_frame=49, stride=1, selection='backbone and name CA', name='aaa', superpose=True, group_selections=["backbone and name CA and resid 0 to 20", "backbone and name CA and resid 21 to 59", "backbone and name CA and resid 60 to 100", "backbone and name CA and resid 101 to 115", "backbone and name CA and resid 116 to 142"]) """ global groupselections def group_selection(traj_universe, reference_universe, selection, group_selection): new_ref = reference_universe.atom_slice( topology.select(selection + ' and ' + group_selection)) new_traj = traj_universe.atom_slice( topology.select(selection + ' and ' + group_selection)) new_traj.superpose(reference=new_ref, parallel=True) print(new_traj) return md.rmsf(new_traj, new_ref, parallel=True) * 10 try: traj_origin = md.load(traj, top=top, stride=stride) ref_origin = md.load(top) topology = traj_origin.topology traj = traj_origin.atom_slice(topology.select(selection)) ref = ref_origin.atom_slice(topology.select(selection)) if traj.n_atoms != ref.n_atoms: traj = traj.atom_slice(topology.select(selection)) if group_selections is not None: groupselections = [ group_selection(traj_origin, ref_origin, selection, s) for s in group_selections ] if superpose: traj.superpose(reference=ref, parallel=True) if (start_frame and stop_frame) is not None: traj = traj[start_frame:stop_frame] elif stop_frame is not None: traj = traj[:stop_frame] elif start_frame is not None: traj = traj[start_frame:] all_rmsf_data_struct = { 'origin_RMSF': md.rmsf(traj, ref, parallel=True) * 10, 'groupSelection_RMSF': groupselections, 'time': traj.time, 'residues': list(range(0, traj.n_atoms)) } ori_selection_rmsf = list(all_rmsf_data_struct['origin_RMSF']) selection_rmsf = all_rmsf_data_struct['origin_RMSF'] domain_base_rmsf = all_rmsf_data_struct['groupSelection_RMSF'] count = 0 for i in range(len(groupselections)): selection_rmsf[count:len(domain_base_rmsf[i]) + count] = domain_base_rmsf[i] count = len(domain_base_rmsf[i]) + count return ori_selection_rmsf, selection_rmsf, all_rmsf_data_struct[ 'time'], all_rmsf_data_struct['residues'] except Exception as Error: print(Error) print("problem in rmsf calculation")
for method in methods: #making a fake combined trajectory where each frame is one centroid of the method traj_0 = md.load(path+method+'/'+method+'_0.pdb') for x in range(1,10): for k in os.listdir(path+method+'/'): if k == (method+'_'+str(x)+'.pdb'): print(k) traj = md.load(path+method+'/'+k) joined_traj = traj.join(traj_0) joined_traj = joined_traj.superpose(traj_0,0) joined_traj.save_pdb(home+'pdb_trajs/'+method+'_traj.pdb') #calculating the rmsf rmsf = (md.rmsf(joined_traj,traj_0)) * 10 #mdtraj calculates in nanometers, so need to convert to angstroms print(len(rmsf)) #replacing the b-factor with open(path+'XTAL/XTAL_0.pdb','r') as data: with open(home+'pdb_figures/'+method+'_fig.pdb','w') as writefile: data = data.readlines() writefile.write(data[0]) writefile.write(data[1]) for line,value in zip(data[2:3326],rmsf): val = round(value,3) adjust = str(val).rjust(6) newline = line.replace((line[60:67]), adjust)