def partial_transform(self, traj): """Featurize an MD trajectory into a vector space via distance after superposition Parameters ---------- traj : mdtraj.Trajectory A molecular dynamics trajectory to featurize. Returns ------- features : np.ndarray, shape=(n_frames, n_ref_frames) The RMSD value of each frame of the input trajectory to be featurized versus each frame in the reference trajectory. The number of features is the number of reference frames. See Also -------- transform : simultaneously featurize a collection of MD trajectories """ if self.atom_indices is not None: sliced_traj = traj.atom_slice(self.atom_indices) else: sliced_traj = traj result = libdistance.cdist(sliced_traj, self.sliced_reference_traj, 'rmsd') return result
def partial_transform(self, traj): """Featurize an MD trajectory into a vector space via distance after superposition Parameters ---------- traj : mdtraj.Trajectory A molecular dynamics trajectory to featurize. Returns ------- features : np.ndarray, dtype=float, shape=(n_samples, n_features) A featurized trajectory is a 2D array of shape `(length_of_trajectory x n_features)` where each `features[i]` vector is computed by applying the featurization function to the `i`th snapshot of the input trajectory. See Also -------- transform : simultaneously featurize a collection of MD trajectories """ if self.atom_indices is not None: sliced_traj = traj.atom_slice(self.atom_indices) else: sliced_traj = traj result = libdistance.cdist(sliced_traj, self.sliced_reference_traj, 'rmsd') return result
def partial_transform(self, traj): """Featurize an MD trajectory into a vector space via distance after superposition Parameters ---------- traj : mdtraj.Trajectory A molecular dynamics trajectory to featurize. Returns ------- features : np.ndarray, shape=(n_frames, n_ref_frames) The RMSD value of each frame of the input trajectory to be featurized versus each frame in the reference trajectory. The number of features is the number of reference frames. See Also -------- transform : simultaneously featurize a collection of MD trajectories """ if self.atom_indices is not None: sliced_traj = traj.atom_slice(self.atom_indices) else: sliced_traj = traj result = libdistance.cdist( sliced_traj, self.sliced_reference_traj, 'rmsd' ) return result
def cdist(XA, XB, metric='euclidean'): if isinstance(metric, six.string_types): return libdistance.cdist(XA, XB, metric) nA, nB = len(XA), len(XB) d = np.empty((nA, nB)) for i in range(nA): d[i, :] = metric(XB, XA, i) return d
def test_dist_double_float_1(): # test without X_indices for metric in VECTOR_METRICS: for X, Y in ((X_double, Y_double), (X_float, Y_float)): dist_1 = dist(X, Y[0], metric) dist_2 = cdist(X, Y, metric)[:, 0] yield lambda: np.testing.assert_almost_equal( dist_1, dist_2, decimal=5 if X.dtype == np.float32 else 10)
def test_cdist_double_float_1(): # test without X_indices for metric in VECTOR_METRICS: for X, Y in ((X_double, Y_double), (X_float, Y_float)): cdist_1 = cdist(X, Y, metric) cdist_2 = scipy.spatial.distance.cdist(X, Y, metric) yield lambda: np.testing.assert_almost_equal( cdist_1, cdist_2, decimal=5 if X.dtype == np.float32 else 10)
def gromos(): trajs rmsdmax = -9999 centers = {} for traj0 in trajs: for traj1 in trajs: rmsd = libdistance.cdist(traj0, traj1, metric="rmsd") x, y, v = maxmatrix(rmsd) if v >= rmsdmax: pass
def test_cdist_double_float_1(): # test without X_indices for metric in VECTOR_METRICS: for X, Y in ((X_double, Y_double), (X_float, Y_float)): cdist_1 = cdist(X, Y, metric) cdist_2 = scipy.spatial.distance.cdist(X, Y, metric) yield lambda : np.testing.assert_almost_equal( cdist_1, cdist_2, decimal=5 if X.dtype == np.float32 else 10)
def test_assign_nearest_rmsd_1(): # rmsd assign nearest without X_indices assignments, inertia = assign_nearest(X_rmsd, Y_rmsd, "rmsd") assert isinstance(assignments, np.ndarray) assert isinstance(inertia, float) cdist_rmsd = cdist(X_rmsd, Y_rmsd, 'rmsd') assert cdist_rmsd.shape == (10, 3) np.testing.assert_array_equal( assignments, cdist_rmsd.argmin(axis=1)) np.testing.assert_almost_equal( inertia, cdist_rmsd[np.arange(10), assignments].sum(), decimal=6)
def test_assign_nearest_rmsd_2(): # rmsd assign nearest with X_indices assignments, inertia = assign_nearest(X_rmsd, Y_rmsd, "rmsd", X_indices) assert isinstance(assignments, np.ndarray) assert isinstance(inertia, float) cdist_rmsd = cdist(X_rmsd, Y_rmsd, 'rmsd') cdist_rmsd = cdist_rmsd[X_indices].astype(np.double) assert cdist_rmsd.shape == (5, 3) np.testing.assert_array_equal( assignments, cdist_rmsd.argmin(axis=1)) np.testing.assert_almost_equal( inertia, cdist_rmsd[np.arange(5), assignments].sum(), decimal=5)
def test_canberra_32_2(): for i in range(10): X = random.randn(10,2).astype(np.float32) Y = X[[0,1,2], :] X_indices = random.random_integers(low=0, high=9, size=5).astype(np.intp) assignments, inertia = assign_nearest(X, Y, 'canberra', X_indices=X_indices) cdist_can = cdist(X[X_indices], Y, metric='canberra') ref = cdist_can.argmin(axis=1) if not np.all(ref == assignments): different = np.where(assignments != ref)[0] row = cdist_can[different, :] # if there are differences between assignments and the 'reference', # make sure that there is actually some difference between the # entries in that row of the distance matrix before throwing # an error if not np.all(row==row[0]): assert False
def test_assign_nearest_float_double_2(): # test with X_indices for metric in VECTOR_METRICS: for X, Y in ((X_double, Y_double), (X_float, Y_float)): if metric == 'canberra' and X.dtype == np.float32: # this is tested separately continue assignments, inertia = assign_nearest(X, Y, metric, X_indices) assert isinstance(assignments, np.ndarray) assert isinstance(inertia, float) cdist_1 = cdist(X[X_indices], Y, metric=metric) yield lambda: np.testing.assert_array_equal( assignments, cdist_1.argmin(axis=1)) yield lambda: np.testing.assert_almost_equal( inertia, cdist_1[np.arange(5), assignments].sum(), decimal=5 if X.dtype == np.float32 else 10)
def test_canberra_32_2(): for i in range(10): X = random.randn(10, 2).astype(np.float32) Y = X[[0, 1, 2], :] X_indices = (random.random_integers(low=0, high=9, size=5) .astype(np.intp)) assignments, inertia = assign_nearest(X, Y, 'canberra', X_indices=X_indices) cdist_can = cdist(X[X_indices], Y, metric='canberra') ref = cdist_can.argmin(axis=1) if not np.all(ref == assignments): different = np.where(assignments != ref)[0] row = cdist_can[different, :] # if there are differences between assignments and the 'reference', # make sure that there is actually some difference between the # entries in that row of the distance matrix before throwing # an error if not np.all(row == row[0]): assert False
def cal_rmsdmatrix(xtcf0, xtcf1, top, sel, dt=1, outfname=None): if not outfname: fname0, fname1 = os.path.split(xtcf0)[1].split(".")[0], os.path.split( xtcf1)[1].split(".")[0] outfname = "%s_%s" % (fname0, fname1) top = md.load_pdb(top) ndx = top.top.select(sel) xtc0 = md.load_xtc(xtcf0, top=top, atom_indices=ndx, stride=dt) if xtcf0 == xtcf1: xtc1 = xtc0 else: xtc1 = md.load_xtc(xtcf1, top=top, atom_indices=ndx, stride=dt) rmsd = libdistance.cdist(xtc0, xtc1, metric="rmsd") rmsd = np.triu(rmsd) #np.save(outfname+".npy", rmsd) #from scipy import sparse #b = sparse.csr_matrix(rmsd) #sparse.save_npz('b_compressed.npz', b, True) x, y, v = maxmatrix(rmsd) np.savez_compressed(outfname + '.npz', a=rmsd, b=np.array([x, y, v])) return rmsd, v, x, y
def test_canberra_32_1(): # with canberra in float32, there is a rounding issue where many of # the distances come out exactly the same, but due to finite floating # point resolution, a different one gets picked than by argmin() # on the cdist for i in range(10): X = random.randn(10,2).astype(np.float32) Y = X[[0,1,2], :] assignments, inertia = assign_nearest(X, Y, 'canberra') cdist_can = cdist(X, Y, metric='canberra') ref = cdist_can.argmin(axis=1) if not np.all(ref == assignments): different = np.where(assignments != ref)[0] row = cdist_can[different, :] # if there are differences between assignments and the 'reference', # make sure that there is actually some difference between the # entries in that row of the distance matrix before throwing # an error if not np.all(row==row[0]): assert False
def test_canberra_32_1(): # with canberra in float32, there is a rounding issue where many of # the distances come out exactly the same, but due to finite floating # point resolution, a different one gets picked than by argmin() # on the cdist for i in range(10): X = random.randn(10, 2).astype(np.float32) Y = X[[0, 1, 2], :] assignments, inertia = assign_nearest(X, Y, 'canberra') cdist_can = cdist(X, Y, metric='canberra') ref = cdist_can.argmin(axis=1) if not np.all(ref == assignments): different = np.where(assignments != ref)[0] row = cdist_can[different, :] # if there are differences between assignments and the 'reference', # make sure that there is actually some difference between the # entries in that row of the distance matrix before throwing # an error if not np.all(row == row[0]): assert False
def test_cdist_rmsd_1(): got = cdist(X_rmsd, Y_rmsd, "rmsd") all2all = np.array([md.rmsd(X_rmsd, Y_rmsd[i], precentered=True) for i in range(len(Y_rmsd))]).T np.testing.assert_almost_equal(got, all2all, decimal=5)