def test_function_featurizer(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] trj0 = trajectories[0] # use the dihedral to compute phi for ala atom_ind = [[4, 6, 8, 14]] func = compute_dihedrals # test with args f = FunctionFeaturizer(func, func_args={"indices": atom_ind}) res1 = f.transform([trj0]) # test with function in a function without any args def funcception(trj): return compute_phi(trj)[1] f = FunctionFeaturizer(funcception) res2 = f.transform([trj0]) # know results f3 = DihedralFeaturizer(['phi'], sincos=False) res3 = f3.transform([trj0]) # compare all for r in [res2, res3]: np.testing.assert_array_almost_equal(res1, r)
def test_that_all_featurizers_run(): # TODO: include all featurizers, perhaps with generator tests dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all = featurizer.transform(trajectories) featurizer = SuperposeFeaturizer(np.arange(15), trj0) X_all = featurizer.transform(trajectories) featurizer = DihedralFeaturizer(["phi", "psi"]) X_all = featurizer.transform(trajectories) featurizer = VonMisesFeaturizer(["phi", "psi"]) X_all = featurizer.transform(trajectories) # Below doesn't work on ALA dipeptide # featurizer = msmbuilder.featurizer.ContactFeaturizer() # X_all = featurizer.transform(trajectories) featurizer = RMSDFeaturizer(trj0) X_all = featurizer.transform(trajectories)
def test_that_all_featurizers_run(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] trj0 = trajectories[0][0] atom_indices, pair_indices = subset_featurizer.get_atompair_indices(trj0) featurizer = msmbuilder.featurizer.AtomPairsFeaturizer(pair_indices) X_all = featurizer.transform(trajectories) featurizer = msmbuilder.featurizer.SuperposeFeaturizer(np.arange(15), trj0) X_all = featurizer.transform(trajectories) featurizer = msmbuilder.featurizer.DihedralFeaturizer(["phi" ,"psi"]) X_all = featurizer.transform(trajectories) #featurizer = msmbuilder.featurizer.ContactFeaturizer() # Doesn't work on ALA dipeptide #X_all = featurizer.transform(trajectories) featurizer = msmbuilder.featurizer.RMSDFeaturizer(trj0) X_all = featurizer.transform(trajectories) atom_featurizer0 = subset_featurizer.SubsetAtomPairs(pair_indices, trj0, exponent=-1.0) cosphi = subset_featurizer.SubsetCosPhiFeaturizer(trj0) sinphi = subset_featurizer.SubsetSinPhiFeaturizer(trj0) cospsi = subset_featurizer.SubsetCosPsiFeaturizer(trj0) sinpsi = subset_featurizer.SubsetSinPsiFeaturizer(trj0) featurizer = subset_featurizer.SubsetFeatureUnion([("pairs", atom_featurizer0), ("cosphi", cosphi), ("sinphi", sinphi), ("cospsi", cospsi), ("sinpsi", sinpsi)]) featurizer.subsets = [np.arange(1) for i in range(featurizer.n_featurizers)] X_all = featurizer.transform(trajectories) eq(X_all[0].shape[1], 1 * featurizer.n_featurizers)
def test_function_featurizer(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] trj0 = trajectories[0] # use the dihedral to compute phi for ala atom_ind = [[4, 6, 8, 14]] func = compute_dihedrals # test with args f = FunctionFeaturizer(func, func_args={"indices": atom_ind}) res1 = f.transform([trj0]) # test with function in a fucntion without any args def funcception(trj): return compute_phi(trj)[1] f = FunctionFeaturizer(funcception) res2 = f.transform([trj0]) # know results f3 = DihedralFeaturizer(['phi'], sincos=False) res3 = f3.transform([trj0]) # compare all for r in [res2, res3]: np.testing.assert_array_almost_equal(res1, r)
def test_alanine_dipeptide(): # will produce 0 features because not enough peptides dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer = msmbuilder.featurizer.AlphaAngleFeaturizer() nothing = featurizer.transform(trajectories) assert(nothing[0].shape[1] == 0)
def test_alanine_dipeptide(): # will produce 0 features because not enough peptides dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer = msmbuilder.featurizer.AlphaAngleFeaturizer() nothing = featurizer.transform(trajectories) assert (nothing[0].shape[1] == 0)
def test_von_mises_featurizer(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer = VonMisesFeaturizer(["phi"], n_bins=18) X_all = featurizer.transform(trajectories) n_frames = trajectories[0].n_frames assert X_all[0].shape == (n_frames, 18), ("unexpected shape returned: (%s, %s)" % X_all[0].shape) featurizer = VonMisesFeaturizer(["phi", "psi"], n_bins=18) X_all = featurizer.transform(trajectories) n_frames = trajectories[0].n_frames assert X_all[0].shape == (n_frames, 36), ("unexpected shape returned: (%s, %s)" % X_all[0].shape) featurizer = VonMisesFeaturizer(["phi", "psi"], n_bins=10) X_all = featurizer.transform(trajectories) assert X_all[0].shape == (n_frames, 20), ("unexpected shape returned: (%s, %s)" % X_all[0].shape) dataset = fetch_fs_peptide() trajectories = dataset["trajectories"][0] #test to make sure results are being put in the right order feat = VonMisesFeaturizer(["phi", "psi"], n_bins=10) _, all_phi = compute_phi(trajectories[0]) X_all = feat.transform([trajectories]) all_res = [] for frame in all_phi: for dihedral_value in frame: all_res.extend( vm.pdf(dihedral_value, loc=feat.loc, kappa=feat.kappa)) print(len(all_res)) #this checks 10 random dihedrals to make sure that they appear in the right columns #for the vonmises bins n_phi = all_phi.shape[1] for k in range(5): #pick a random phi dihedral rndint = np.random.choice(range(n_phi)) #figure out where we expect it to be in X_all indices_to_expect = [] for i in range(10): indices_to_expect += [n_phi * i + rndint] #we know the results in all_res are dihedral1(bin1-bin10) dihedral2(bin1 to bin10) # we are checking if X is alldihedrals(bin1) then all dihedrals(bin2) expected_res = all_res[rndint * 10:10 + rndint * 10] assert (np.array([X_all[0][0, i] for i in indices_to_expect]) == expected_res).all()
def test_alanine_dipeptide(): # This test takes the rmsd of the 0th set of alanine dipeptide # trajectories relative to the 0th frame of the dataset. # The test asserts that the first rmsd calculated will be zero. dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer = msmbuilder.featurizer.StrucRMSDFeaturizer( trajectories[0], trajectories[0][0], range(trajectories[0].n_atoms)) data = featurizer.transform(trajectories[0]) assert(data[0] < 1e-3)
def test_SubsetAtomPairs2(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] trj0 = trajectories[0][0] atom_indices, pair_indices = subset_featurizer.get_atompair_indices(trj0) featurizer = msmbuilder.featurizer.AtomPairsFeaturizer(pair_indices) X_all0 = featurizer.transform(trajectories) featurizer = subset_featurizer.SubsetAtomPairs(pair_indices, trj0, subset=np.array([0, 1])) X_all = featurizer.transform(trajectories) any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
def test_SubsetAtomPairs0(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] trj0 = trajectories[0][0] atom_indices, pair_indices = subset_featurizer.get_atompair_indices(trj0) featurizer = msmbuilder.featurizer.AtomPairsFeaturizer(pair_indices) X_all0 = featurizer.transform(trajectories) featurizer = subset_featurizer.SubsetAtomPairs(pair_indices, trj0) featurizer.subset = np.arange(len(pair_indices)) X_all = featurizer.transform(trajectories) any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
def test_SubsetAtomPairs_2(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all0 = featurizer.transform(trajectories) featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.arange(len(pair_indices))) X_all = featurizer.transform(trajectories) any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
def test_von_mises_featurizer(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer = VonMisesFeaturizer(["phi", "psi"], n_bins=18) X_all = featurizer.transform(trajectories) n_frames = trajectories[0].n_frames assert X_all[0].shape == (n_frames, 36), ("unexpected shape returned: (%s, %s)" % X_all[0].shape) featurizer = VonMisesFeaturizer(["phi", "psi"], n_bins=10) X_all = featurizer.transform(trajectories) assert X_all[0].shape == (n_frames, 20), ("unexpected shape returned: (%s, %s)" % X_all[0].shape)
def test_alanine_dipeptide_basic(): # This test takes the rmsd of the 0th set of alanine dipeptide # trajectories relative to the 0th frame of the dataset. # The test asserts that all rmsd's calculated will be equal # to the ones that would be calculated straight from mdtraj. dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer = StrucRMSDFeaturizer(trajectories[0][0]) data = featurizer.transform(trajectories[0:1]) true_rmsd = md.rmsd(trajectories[0], trajectories[0][0]) np.testing.assert_array_almost_equal(data[0][:,0], true_rmsd, decimal=4)
def test_alanine_dipeptide_basic(): # This test takes the rmsd of the 0th set of alanine dipeptide # trajectories relative to the 0th frame of the dataset. # The test asserts that all rmsd's calculated will be equal # to the ones that would be calculated straight from mdtraj. dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer = StrucRMSDFeaturizer(trajectories[0][0]) data = featurizer.transform(trajectories[0:1]) true_rmsd = md.rmsd(trajectories[0], trajectories[0][0]) np.testing.assert_array_almost_equal(data[0][:, 0], true_rmsd, decimal=4)
def test_two_refs_omitting_indices(): # This test verifies that the result produced when # atom_indices are omitted is the same as the result # produced when atom_indices is all atom indices. dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer_indices = StrucRMSDFeaturizer(trajectories[0][0:2], np.arange(trajectories[0].n_atoms)) data_indices = featurizer_indices.transform(trajectories[0:1]) featurizer = StrucRMSDFeaturizer(trajectories[0][0:2]) data = featurizer.transform(trajectories[0:1]) np.testing.assert_array_almost_equal(data[0], data_indices[0], decimal=4)
def test_two_refs_omitting_indices(): # This test verifies that the result produced when # atom_indices are omitted is the same as the result # produced when atom_indices is all atom indices. dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer_indices = StrucRMSDFeaturizer( trajectories[0][0:2], np.arange(trajectories[0].n_atoms)) data_indices = featurizer_indices.transform(trajectories[0:1]) featurizer = StrucRMSDFeaturizer(trajectories[0][0:2]) data = featurizer.transform(trajectories[0:1]) np.testing.assert_array_almost_equal(data[0], data_indices[0], decimal=4)
def test_two_refs(): # This test uses the 0th and 1st frames of the 0th set of # adp trajectories as the two reference trajectories and # ensures that the rmsd of the 0th frame of the dataset with # the 0th reference are identical and the 1st frame of the # dataset with the 1st reference are identical. dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer = msmbuilder.featurizer.StrucRMSDFeaturizer( trajectories[0], trajectories[0][0:2], range(trajectories[0].n_atoms)) data = featurizer.transform(trajectories[0]) # TODO: Figure out why arrays are 3D assert(data[0][0][0] - data[1][0][1] < 1e-3) assert(data[1][0][0] - data[0][0][1] < 1e-3)
def test_SubsetAtomPairs_3(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) featurizer = AtomPairsFeaturizer(pair_indices) X_all0 = featurizer.transform(trajectories) featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.array([0, 1])) X_all = featurizer.transform(trajectories) try: any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)]) except AssertionError: pass else: raise AssertionError("Did not raise an assertion!")
def test_different_indices(): # This test verifies that the rmsd's calculated from # different sets of atom indices are not the same, # but that the arrays are still the same shape. dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] n_atoms = trajectories[0].n_atoms halfway_point = n_atoms//2 featurizer_first_half = StrucRMSDFeaturizer(trajectories[0][0], np.arange(halfway_point)) data_first_half = featurizer_first_half.transform(trajectories[0:1]) featurizer_second_half = StrucRMSDFeaturizer(trajectories[0][0], np.arange(halfway_point,n_atoms)) data_second_half = featurizer_second_half.transform(trajectories[0:1]) assert data_first_half[0].shape == data_second_half[0].shape # janky way to show that the arrays shouldn't be equal here assert sum(data_first_half[0][:,0]) != sum(data_second_half[0][:,0])
def test_two_refs_basic(): # This test uses the 0th and 1st frames of the 0th set of # adp trajectories as the two reference trajectories and # ensures that the rmsd of the 0th frame of the dataset with # the 0th reference are identical and the 1st frame of the # dataset with the 1st reference are identical. dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer = StrucRMSDFeaturizer(trajectories[0][0:2]) data = featurizer.transform(trajectories[0:1]) true_rmsd = np.zeros((trajectories[0].n_frames, 2)) for frame in range(2): true_rmsd[:, frame] = md.rmsd(trajectories[0], trajectories[0][frame]) np.testing.assert_almost_equal(data[0][0, 0], data[0][1, 1], decimal=3) np.testing.assert_almost_equal(data[0][1, 0], data[0][0, 1], decimal=3) np.testing.assert_array_almost_equal(data[0], true_rmsd, decimal=4)
def test_different_indices(): # This test verifies that the rmsd's calculated from # different sets of atom indices are not the same, # but that the arrays are still the same shape. dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] n_atoms = trajectories[0].n_atoms halfway_point = n_atoms // 2 featurizer_first_half = StrucRMSDFeaturizer(trajectories[0][0], np.arange(halfway_point)) data_first_half = featurizer_first_half.transform(trajectories[0:1]) featurizer_second_half = StrucRMSDFeaturizer( trajectories[0][0], np.arange(halfway_point, n_atoms)) data_second_half = featurizer_second_half.transform(trajectories[0:1]) assert data_first_half[0].shape == data_second_half[0].shape # janky way to show that the arrays shouldn't be equal here assert sum(data_first_half[0][:, 0]) != sum(data_second_half[0][:, 0])
def test_two_refs_basic(): # This test uses the 0th and 1st frames of the 0th set of # adp trajectories as the two reference trajectories and # ensures that the rmsd of the 0th frame of the dataset with # the 0th reference are identical and the 1st frame of the # dataset with the 1st reference are identical. dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer = StrucRMSDFeaturizer(trajectories[0][0:2]) data = featurizer.transform(trajectories[0:1]) true_rmsd = np.zeros((trajectories[0].n_frames, 2)) for frame in range(2): true_rmsd[:, frame] = md.rmsd(trajectories[0], trajectories[0][frame]) np.testing.assert_almost_equal(data[0][0,0], data[0][1,1], decimal=3) np.testing.assert_almost_equal(data[0][1,0], data[0][0,1], decimal=3) np.testing.assert_array_almost_equal(data[0], true_rmsd, decimal=4)
def test_that_all_featurizers_run(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] trj0 = trajectories[0][0] atom_indices, pair_indices = subset_featurizer.get_atompair_indices(trj0) featurizer = msmbuilder.featurizer.AtomPairsFeaturizer(pair_indices) X_all = featurizer.transform(trajectories) featurizer = msmbuilder.featurizer.SuperposeFeaturizer(np.arange(15), trj0) X_all = featurizer.transform(trajectories) featurizer = msmbuilder.featurizer.DihedralFeaturizer(["phi", "psi"]) X_all = featurizer.transform(trajectories) #featurizer = msmbuilder.featurizer.ContactFeaturizer() # Doesn't work on ALA dipeptide #X_all = featurizer.transform(trajectories) featurizer = msmbuilder.featurizer.RMSDFeaturizer(trj0) X_all = featurizer.transform(trajectories) atom_featurizer0 = subset_featurizer.SubsetAtomPairs(pair_indices, trj0, exponent=-1.0) cosphi = subset_featurizer.SubsetCosPhiFeaturizer(trj0) sinphi = subset_featurizer.SubsetSinPhiFeaturizer(trj0) cospsi = subset_featurizer.SubsetCosPsiFeaturizer(trj0) sinpsi = subset_featurizer.SubsetSinPsiFeaturizer(trj0) featurizer = subset_featurizer.SubsetFeatureUnion([("pairs", atom_featurizer0), ("cosphi", cosphi), ("sinphi", sinphi), ("cospsi", cospsi), ("sinpsi", sinpsi)]) featurizer.subsets = [ np.arange(1) for i in range(featurizer.n_featurizers) ] X_all = featurizer.transform(trajectories) eq(X_all[0].shape[1], 1 * featurizer.n_featurizers)
def test_that_all_featurizers_run(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) atom_featurizer0 = SubsetAtomPairs(pair_indices, trj0, exponent=-1.0) cosphi = SubsetCosPhiFeaturizer(trj0) sinphi = SubsetSinPhiFeaturizer(trj0) cospsi = SubsetCosPsiFeaturizer(trj0) sinpsi = SubsetSinPsiFeaturizer(trj0) featurizer = SubsetFeatureUnion([("pairs", atom_featurizer0), ("cosphi", cosphi), ("sinphi", sinphi), ("cospsi", cospsi), ("sinpsi", sinpsi)]) featurizer.subsets = [ np.arange(1) for i in range(featurizer.n_featurizers) ] X_all = featurizer.transform(trajectories) eq(X_all[0].shape[1], 1 * featurizer.n_featurizers)
def test_that_all_featurizers_run(): dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] trj0 = trajectories[0][0] atom_indices, pair_indices = get_atompair_indices(trj0) atom_featurizer0 = SubsetAtomPairs(pair_indices, trj0, exponent=-1.0) cosphi = SubsetCosPhiFeaturizer(trj0) sinphi = SubsetSinPhiFeaturizer(trj0) cospsi = SubsetCosPsiFeaturizer(trj0) sinpsi = SubsetSinPsiFeaturizer(trj0) featurizer = SubsetFeatureUnion([ ("pairs", atom_featurizer0), ("cosphi", cosphi), ("sinphi", sinphi), ("cospsi", cospsi), ("sinpsi", sinpsi) ]) featurizer.subsets = [np.arange(1) for i in range(featurizer.n_featurizers)] X_all = featurizer.transform(trajectories) eq(X_all[0].shape[1], 1 * featurizer.n_featurizers)