def test_projection_for_pca(self): traj = pt.load("./data/tz2.nc", "./data/tz2.parm7") state = pt.load_cpptraj_state(command) state.run() cpp_modes = state.data['MyEvecs'] cpp_arr_crd = np.array(cpp_modes._get_avg_crd()) cpp_arr_crd = cpp_arr_crd.reshape(117, 3) mask = '!@H=' pt.superpose(traj, mask=mask) avg = pt.mean_structure(traj) atom_indices = traj.top(mask).indices strip_avg_coords = avg.xyz[atom_indices] pt.superpose(traj, mask=mask, ref=avg) avg2 = pt.mean_structure(traj, mask=mask) mat = pt.matrix.covar(traj, mask) modes = pt.matrix.diagonalize(mat, n_vecs=2, dtype='dataset')[0] aa_eq(cpp_arr_crd, avg2.xyz) aa_eq(np.abs(modes.eigenvalues), np.abs(state.data['MyEvecs'].eigenvalues)) aa_eq(np.abs(modes.eigenvectors), np.abs(state.data['MyEvecs'].eigenvectors)) projection_data = pt.all_actions.projection(traj, mask=mask, average_coords=avg2.xyz, eigenvalues=modes.eigenvalues, eigenvectors=modes.eigenvectors, scalar_type='covar') aa_eq(np.abs(projection_data), np.abs(state.data[-2:].values), decimal=3)
def test_mass(self): traj = self.traj # cpptraj output cm = ''' reference avg.pdb rms R0 reference @CA,C,N,O savematrices mass ''' state = pt.load_batch(traj, cm) state.run() saved_mat = state.data[-1].values # pytraj output avg = pt.mean_structure(traj) mat = pt.calc_rotation_matrix(traj, ref=avg, mask='@CA,C,N,O', mass=True) assert mat.shape == (traj.n_frames, 3, 3), 'mat shape' aa_eq(mat.flatten(), saved_mat.flatten()) # with rmsd avg = pt.mean_structure(traj) mat2, rmsd_ = pt.calc_rotation_matrix(traj, ref=avg, mask='@CA,C,N,O', mass=True, with_rmsd=True) aa_eq(mat2.flatten(), saved_mat.flatten()) assert pt.tools.rmsd(rmsd_, state.data['R0']) < 1E-3
def test_autoimage(self): traj = pt.iterload(fn('tz2.ortho.nc'), fn('tz2.ortho.parm7')) t0 = traj[:] t0.autoimage() avg_0 = pt.mean_structure(t0, '@CA') avg_1 = pt.mean_structure(traj(autoimage=True), '@CA') aa_eq(avg_0.xyz, avg_1.xyz)
def test_autoimage(self): traj = pt.iterload("data/tz2.ortho.nc", "data/tz2.ortho.parm7") t0 = traj[:] t0.autoimage() avg_0 = pt.mean_structure(t0, '@CA') avg_1 = pt.mean_structure(traj(autoimage=True), '@CA') aa_eq(avg_0.xyz, avg_1.xyz)
def test_comprehensive(self): traj = pt.iterload(fn('Tc5b.x'), fn('Tc5b.top')) # make sure we DO reproducing cpptraj output f_saved = pt.iterload(fn("avg.Tc5b.pdb"), traj.top)[0] # shorter frame2 = mean_structure(traj) aa_eq(frame2.xyz, f_saved.xyz, decimal=3) frame3 = mean_structure(traj=traj) aa_eq(frame3.xyz, f_saved.xyz, decimal=3) # test list frame4 = mean_structure(traj=[traj, traj[:3]], top=traj.top) # test iter frame5 = mean_structure(traj=traj(1, 8, 2), top=traj.top) f5_saved = pt.iterload(fn("avg.Tc5b.frame_2_to_8_skip_2.pdb"), traj.top)[0] aa_eq(frame5.xyz, f5_saved.xyz, decimal=3) # test iter CA frame5 = mean_structure(traj[[0, 3, 7]], '@CA', top=traj.top) # use atom_indices ca_indices = pt.select('@CA', traj.top) frame5_1 = mean_structure(traj[[0, 3, 7]], ca_indices, top=traj.top) # test frame_indices frame6 = mean_structure(traj, mask='@CA', frame_indices=[0, 3, 7]) aa_eq(frame5.xyz, frame6.xyz, decimal=3) aa_eq(frame5_1.xyz, frame6.xyz, decimal=3) xyz_0 = pt.get_coordinates(traj(1, 8, 2)) xyz_1 = np.array([ frame.xyz.copy() for frame in traj.iterframe(frame_indices=range(1, 8, 2)) ]) aa_eq(xyz_0, xyz_1, decimal=3) # test as traj out_traj = mean_structure(traj, mask='@CA', frame_indices=[0, 3, 7], dtype='traj') assert isinstance(out_traj, Trajectory), 'must be Trajectory' aa_eq(out_traj.xyz, [frame6.xyz], decimal=3) # raise if not trajectory, traj or frame with pytest.raises(ValueError): pt.mean_structure(traj, dtype='trajxyz')
def prepare(self): '''Load trajectory, print information and perform initial alignment and fit.''' self.load_traj() self.atoms = self.traj.n_atoms self.frames = self.traj.n_frames print('\nPROTEIN INFORMATION') print('PDB code: {}'.format(self.PDB)) print('residues: {}'.format(self.atoms)) print('frames: {}'.format(self.frames)) print('temperature: {}'.format(self.temperature)) # Align molecule to principal axes pt.principal_axes(self.traj, dorotation=True) # Center molecule at the origin pt.center(self.traj, center='origin') # First rmsd fit pt.rmsd(self.traj) # Calculate average structure self.avg = pt.mean_structure(self.traj) # Perform rms fit to average structure pt.rmsd(self.traj, ref=self.avg) # Initialize arrays self.initialize_arrays()
def test_nomass(self): traj = self.traj # cpptraj output cm = ''' reference avg.pdb rms R0 reference @CA,C,N,O savematrices ''' state = pt.load_batch(traj, cm) state.run() saved_mat = state.data[-1].values # pytraj output avg = pt.mean_structure(traj) mat = pt.calc_rotation_matrix(traj, ref=avg, mask='@CA,C,N,O') assert mat.shape == (traj.n_frames, 3, 3), 'mat shape' aa_eq(mat.flatten(), saved_mat.flatten()) # not specify reference cm = ''' rms R0 @CA,C,N,O savematrices ''' state = pt.load_batch(traj, cm) state.run() saved_mat = state.data[-1].values mat = pt.calc_rotation_matrix(traj, mask='@CA,C,N,O') aa_eq(mat.flatten(), saved_mat.flatten())
def test_pmap_average_structure(self): traj = pt.iterload("data/tz2.nc", "data/tz2.parm7") saved_frame = pt.mean_structure(traj, '@CA') saved_xyz = saved_frame.xyz for n_cores in [2, 3, 4]: frame = pt.pmap(pt.mean_structure, traj, '@CA', n_cores=n_cores) aa_eq(frame.xyz, saved_xyz)
def test_dataset_coords_ref(self): traj = pt.iterload('data/tz2.nc', 'data/tz2.parm7') avg_frame = pt.mean_structure(traj(rmsfit=(0, '!@H='))) state = self.state # need to loop several times to make sure this does not fail # due to memory free for _ in range(20): cpp_ref = state.data['AVG'].get_frame() aa_eq(avg_frame.xyz, cpp_ref.xyz)
def test_comprehensive(self): traj = pt.iterload("./data/Tc5b.x", "./data/Tc5b.top") # make sure we DO reproducing cpptraj output f_saved = pt.iterload("./data/avg.Tc5b.pdb", traj.top)[0] # shorter frame2 = mean_structure(traj) aa_eq(frame2.xyz, f_saved.xyz, decimal=3) frame3 = mean_structure(traj=traj) aa_eq(frame3.xyz, f_saved.xyz, decimal=3) # test list frame4 = mean_structure(traj=[traj, traj[:3]], top=traj.top) # test iter frame5 = mean_structure(traj=traj(1, 8, 2), top=traj.top) f5_saved = pt.iterload("./data/avg.Tc5b.frame_2_to_8_skip_2.pdb", traj.top)[0] aa_eq(frame5.xyz, f5_saved.xyz, decimal=3) # test iter CA frame5 = mean_structure(traj[[0, 3, 7]], '@CA', top=traj.top) # use atom_indices ca_indices = pt.select('@CA', traj.top) frame5_1 = mean_structure(traj[[0, 3, 7]], ca_indices, top=traj.top) # test frame_indices frame6 = mean_structure(traj, mask='@CA', frame_indices=[0, 3, 7]) aa_eq(frame5.xyz, frame6.xyz, decimal=3) aa_eq(frame5_1.xyz, frame6.xyz, decimal=3) xyz_0 = pt.get_coordinates(traj(1, 8, 2)) xyz_1 = np.array([frame.xyz.copy( ) for frame in traj.iterframe(frame_indices=range(1, 8, 2))]) aa_eq(xyz_0, xyz_1, decimal=3) # test as traj out_traj = mean_structure(traj, mask='@CA', frame_indices=[0, 3, 7], dtype='traj') assert isinstance(out_traj, Trajectory), 'must be Trajectory' aa_eq(out_traj.xyz, frame6.xyz, decimal=3) # raise if not trajectory, traj or frame self.assertRaises(ValueError, lambda: pt.mean_structure(traj, dtype='trajxyz'))
def test_autoimage_with_rmsfit(self): traj = pt.iterload(fn('tz2.ortho.nc'), fn('tz2.ortho.parm7')) t0 = traj[:] pt.autoimage(t0).superpose() avg_0 = pt.mean_structure(t0, '@CA') avg_1 = pt.mean_structure(traj(autoimage=True, rmsfit=0), '@CA') aa_eq(avg_0.xyz, avg_1.xyz) # 3rd frame # assign traj again t0 = traj[:] pt.autoimage(t0).superpose(ref=3) avg_0 = pt.mean_structure(t0, '@CA') avg_1 = pt.mean_structure(traj(autoimage=True, rmsfit=3), '@CA') avg_2 = pt.mean_structure(traj, autoimage=True, rmsfit=3, mask='@CA') aa_eq(avg_0.xyz, avg_1.xyz) aa_eq(avg_0.xyz, avg_2.xyz) # 3rd frame, frame_indices # assign traj again frame_indices = [0, 8, 5] t0 = traj[frame_indices] t1 = traj[frame_indices] t0.autoimage().superpose(ref=-1) avg_0 = pt.mean_structure(t0, '@CA') # use ref=5 which correspond to original index # try with pytraj.TrajectoryIterator avg_1 = pt.mean_structure( traj, autoimage=True, rmsfit=5, mask='@CA', frame_indices=frame_indices) # try with pytraj.Trajectory avg_2 = pt.mean_structure(t1, autoimage=True, rmsfit=-1, mask='@CA') avg_3 = pt.mean_structure( traj[:], autoimage=True, rmsfit=5, mask='@CA', frame_indices=frame_indices) aa_eq(avg_0.xyz, avg_1.xyz) aa_eq(avg_0.xyz, avg_2.xyz) aa_eq(avg_0.xyz, avg_3.xyz)
def clean_traj(self): # Align molecule to principal axes pt.principal_axes(self.traj, dorotation=True) # Center molecule at the origin pt.center(self.traj, center='origin') # First rmsd fit pt.rmsd(self.traj) # Calculate average structure self.avg = pt.mean_structure(self.traj) # Perform rms fit to average structure pt.rmsd(self.traj, ref=self.avg)
def test_autoimage_with_rmsfit(self): traj = pt.iterload("data/tz2.ortho.nc", "data/tz2.ortho.parm7") t0 = traj[:] pt.autoimage(t0).superpose() avg_0 = pt.mean_structure(t0, '@CA') avg_1 = pt.mean_structure(traj(autoimage=True, rmsfit=0), '@CA') aa_eq(avg_0.xyz, avg_1.xyz) # 3rd frame # assign traj again t0 = traj[:] pt.autoimage(t0).superpose(ref=3) avg_0 = pt.mean_structure(t0, '@CA') avg_1 = pt.mean_structure(traj(autoimage=True, rmsfit=3), '@CA') avg_2 = pt.mean_structure(traj, autoimage=True, rmsfit=3, mask='@CA') aa_eq(avg_0.xyz, avg_1.xyz) aa_eq(avg_0.xyz, avg_2.xyz) # 3rd frame, frame_indices # assign traj again frame_indices = [0, 8, 5] t0 = traj[frame_indices] t1 = traj[frame_indices] t0.autoimage().superpose(ref=-1) avg_0 = pt.mean_structure(t0, '@CA') # use ref=5 which correspond to original index # try with pytraj.TrajectoryIterator avg_1 = pt.mean_structure(traj, autoimage=True, rmsfit=5, mask='@CA', frame_indices=frame_indices) # try with pytraj.Trajectory avg_2 = pt.mean_structure(t1, autoimage=True, rmsfit=-1, mask='@CA') avg_3 = pt.mean_structure(traj[:], autoimage=True, rmsfit=5, mask='@CA', frame_indices=frame_indices) aa_eq(avg_0.xyz, avg_1.xyz) aa_eq(avg_0.xyz, avg_2.xyz) aa_eq(avg_0.xyz, avg_3.xyz)
def test_projection_for_pca(self): traj = pt.load(fn('tz2.nc'), fn('tz2.parm7')) state = pt.load_cpptraj_state(command) state.run() cpp_modes = state.data['MyEvecs'] cpp_arr_crd = np.array(cpp_modes._get_avg_crd()) cpp_arr_crd = cpp_arr_crd.reshape(117, 3) mask = '!@H=' pt.superpose(traj, mask=mask) avg = pt.mean_structure(traj) atom_indices = traj.top(mask).indices avg.xyz[atom_indices] pt.superpose(traj, mask=mask, ref=avg) avg2 = pt.mean_structure(traj, mask=mask) mat = pt.matrix.covar(traj, mask) modes = pt.matrix.diagonalize(mat, n_vecs=2, dtype='dataset')[0] aa_eq(cpp_arr_crd, avg2.xyz) aa_eq(np.abs(modes.eigenvalues), np.abs(state.data['MyEvecs'].eigenvalues)) aa_eq(np.abs(modes.eigenvectors), np.abs(state.data['MyEvecs'].eigenvectors)) projection_data = pt.all_actions.projection( traj, mask=mask, average_coords=avg2.xyz, eigenvalues=modes.eigenvalues, eigenvectors=modes.eigenvectors, scalar_type='covar') aa_eq(np.abs(projection_data), np.abs(state.data[-2:].values), decimal=3)
def test_RMSF(self): traj = pt.iterload("./data/tz2.nc", "./data/tz2.parm7") state = pt.load_batch(traj, ''' rms first average crdset MyAvg run rms ref MyAvg atomicfluct out fluct.agr''') state.run() t0 = traj[:] pt.superpose(t0, ref=0) avg = pt.mean_structure(t0) pt.superpose(t0, ref=avg) data = pt.rmsf(t0) aa_eq(data, state.data[-1].values)
def test_iter_options(self): traj = pt.iterload("data/tz2.ortho.nc", "data/tz2.ortho.parm7") t0 = traj[:].autoimage().rmsfit(ref=0) saved_avg = pt.mean_structure(t0) saved_radgyr = pt.radgyr(traj, '@CA') # perform autoimage, then rms fit to 1st frame, then compute mean structure iter_options = {'autoimage': True, 'rmsfit': 0} for n_cores in [2, 3]: avg = pt.pmap(pt.mean_structure, traj, iter_options=iter_options, n_cores=n_cores) aa_eq(saved_avg.xyz, avg.xyz) radgyr_ = pt.tools.dict_to_ndarray( pt.pmap(pt.radgyr, traj, iter_options={'mask': '@CA'})) aa_eq(radgyr_[0], saved_radgyr)
def test_RMSF(self): traj = pt.iterload(fn('tz2.nc'), fn('tz2.parm7')) state = pt.load_batch(traj, ''' rms first average crdset MyAvg run rms ref MyAvg atomicfluct out fluct.agr''') state.run() t0 = traj[:] pt.superpose(t0, ref=0) avg = pt.mean_structure(t0) pt.superpose(t0, ref=avg) data = pt.rmsf(t0) aa_eq(data, state.data[-1].values)
def test_iter_options(self): traj = pt.iterload("data/tz2.ortho.nc", "data/tz2.ortho.parm7") t0 = traj[:].autoimage().rmsfit(ref=0) saved_avg = pt.mean_structure(t0) saved_radgyr = pt.radgyr(traj, '@CA') # perform autoimage, then rms fit to 1st frame, then compute mean structure iter_options = {'autoimage': True, 'rmsfit': 0} for n_cores in [2, 3]: avg = pt.pmap(pt.mean_structure, traj, iter_options=iter_options, n_cores=n_cores) aa_eq(saved_avg.xyz, avg.xyz) radgyr_ = pt.tools.dict_to_ndarray(pt.pmap(pt.radgyr, traj, iter_options={'mask': '@CA'})) aa_eq(radgyr_[0], saved_radgyr)
import pytraj as pt pdb = pt.load_pdb_rcsb("1l2y") avg_pdb = pt.mean_structure(pdb, '@CA') print(avg_pdb)
def calc_(traj=traj): frame = pt.mean_structure(traj) print(frame)
from sklearn.decomposition import PCA import pytraj as pt # we use `load` method to load all data to memory. This is good for small data size. # use `pytraj.iterload` for out-of-core traj. traj = pt.load('../tests/data/tz2.nc', '../tests/data/tz2.parm7') pca = PCA(n_components=2) # superpose to 1st frame pt.superpose(traj, ref=0, mask='!@H=') # create average structure avg = pt.mean_structure(traj) # superpose all structures to average frame pt.superpose(traj, ref=avg, mask='!@H=') # perform PCA calculation and get transformed coords # we need to reshape 3D traj.xyz array to 2D to make sklearn happy # make a new traj by stripping all H atoms traj_new = traj['!@H='] xyz_2d = traj_new.xyz.reshape(traj_new.n_frames, traj_new.n_atoms * 3) print(xyz_2d.shape) # (n_frames, n_dimensions) reduced_cartesian = pca.fit_transform(xyz_2d) print(reduced_cartesian.shape) # (n_frames, n_dimensions) plt.figure()