示例#1
0
def read_and_featurize(filename, dihedrals=['chi2'], stride=10):
	#print("reading and featurizing %s" %(filename))
	top = md.load_frame(filename, 0).topology
	#print("got top")
	atom_indices = [a.index for a in top.atoms if a.residue.resSeq == 93 and a.residue != "POPC" and str(a.residue)[0] == "H"]
	print(len(atom_indices))
	#atom_indices = [a.index for a in top.atoms if a.residue.chain.index == 0 and a.residue.resSeq != 93 and a.residue != "POPC" and a.residue.resSeq != 130 and a.residue.resSeq != 172 and a.residue.resSeq != 79 and a.residue.resSeq != 341]
	#print("got indices")
	traj = md.load(filename, stride=1000, atom_indices=atom_indices)
	#print("got traj")
	featurizer = DihedralFeaturizer(types = dihedrals)
	features = featurizer.transform(traj_list = traj)
	#print(np.shape(features))
	#print("finished featurizing")

	directory = filename.split("/")
	condition = directory[len(directory)-2]
	dcd_file = directory[len(directory)-1]
	new_file = "%s_features_stride%d.h5" %(dcd_file.rsplit( ".", 1 )[ 0 ] , stride)
	new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features"
	new_condition_dir = "%s/%s" %(new_root_dir, condition)

	new_file_full = "%s/%s/%s" %(new_root_dir, condition, new_file)
	#print("saving features as %s" %new_file_full)

	verbosedump(features, new_file_full)
	return features
示例#2
0
def read_and_featurize(filename, dihedrals=['phi', 'psi', 'chi2'], stride=10):
	print("reading and featurizing %s" %(filename))

	traj = md.load(filename)
	#test_traj_init = md.load_frame(filename,5)
	#test_traj_init.save_pdb("/scratch/users/enf/b2ar_analysis/test_init.pdb")

	#traj.topology = fix_topology(traj.topology)

	#traj[-1].save_pdb("/scratch/users/enf/b2ar_analysis/test_fixed.pdb")
	#traj.save_dcd("/scratch/users/enf/b2ar_analysis/test_fixed.dcd")

	#print("got traj")
	featurizer = DihedralFeaturizer(types = dihedrals)
	features = featurizer.transform(traj_list = traj)
	#print("finished featurizing")

	directory = filename.split("/")
	traj_file = directory[len(directory)-1]
	condition = traj_file.split("_")[0].split(".")[0]

	print("Condition %s has features of shape %s" %(condition, np.shape(features)))

	new_file = "/scratch/users/enf/b2ar_analysis/combined_features/%s_features.h5" %condition
	verbosedump(features, new_file)
示例#3
0
def read_and_featurize(filename, dihedrals=['phi', 'psi', 'chi2'], stride=10):
    print(("reading and featurizing %s" % (filename)))

    traj = md.load(filename)
    #test_traj_init = md.load_frame(filename,5)
    #test_traj_init.save_pdb("/scratch/users/enf/b2ar_analysis/test_init.pdb")

    #traj.topology = fix_topology(traj.topology)

    #traj[-1].save_pdb("/scratch/users/enf/b2ar_analysis/test_fixed.pdb")
    #traj.save_dcd("/scratch/users/enf/b2ar_analysis/test_fixed.dcd")

    #print("got traj")
    featurizer = DihedralFeaturizer(types=dihedrals)
    features = featurizer.transform(traj_list=traj)
    #print("finished featurizing")

    directory = filename.split("/")
    traj_file = directory[len(directory) - 1]
    condition = traj_file.split("_")[0].split(".")[0]

    print(("Condition %s has features of shape %s" %
           (condition, np.shape(features))))

    new_file = "/scratch/users/enf/b2ar_analysis/combined_features/%s_features.h5" % condition
    verbosedump(features, new_file)
示例#4
0
def read_and_featurize_divided(filename, dihedrals=['phi', 'psi', 'chi2'], stride=10):
	#print("reading and featurizing %s" %(filename))

	traj_top = md.load_frame(filename,0).topology
	atom_indices = [a.index for a in traj_top.atoms if a.residue.name[0:2] != "HI"]

	traj = md.load(filename,atom_indices=atom_indices)
	#print("got traj")
	featurizer = DihedralFeaturizer(types = dihedrals)
	features = featurizer.transform(traj_list = traj)
	#print(np.shape(features))
	#print("finished featurizing")

	directory = filename.split("/")
	condition = directory[len(directory)-2]
	dcd_file = directory[len(directory)-1]
	new_file = "%s_features_stride%d.h5" %(dcd_file.rsplit( ".", 1 )[ 0 ] , stride)
	new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features"
	new_condition_dir = "%s/%s" %(new_root_dir, condition)

	new_file_full = "%s/%s/%s" %(new_root_dir, condition, new_file)
	#print("saving features as %s" %new_file_full)

	verbosedump(features, new_file_full)
	return features
示例#5
0
def read_and_featurize_divided(filename,
                               dihedrals=['phi', 'psi', 'chi2'],
                               stride=10):
    #print("reading and featurizing %s" %(filename))

    traj_top = md.load_frame(filename, 0).topology
    atom_indices = [
        a.index for a in traj_top.atoms if a.residue.name[0:2] != "HI"
    ]

    traj = md.load(filename, atom_indices=atom_indices)
    #print("got traj")
    featurizer = DihedralFeaturizer(types=dihedrals)
    features = featurizer.transform(traj_list=traj)
    #print(np.shape(features))
    #print("finished featurizing")

    directory = filename.split("/")
    condition = directory[len(directory) - 2]
    dcd_file = directory[len(directory) - 1]
    new_file = "%s_features_stride%d.h5" % (dcd_file.rsplit(".", 1)[0], stride)
    new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features"
    new_condition_dir = "%s/%s" % (new_root_dir, condition)

    new_file_full = "%s/%s/%s" % (new_root_dir, condition, new_file)
    #print("saving features as %s" %new_file_full)

    verbosedump(features, new_file_full)
    return features
示例#6
0
def read_and_featurize(filename, dihedrals=['chi2'], stride=10):
	#print("reading and featurizing %s" %(filename))
	top = md.load_frame(filename, 0).topology
	#print("got top")
	atom_indices = [a.index for a in top.atoms if a.residue.resSeq == 93 and a.residue != "POPC" and str(a.residue)[0] == "H"]
	print((len(atom_indices)))
	#atom_indices = [a.index for a in top.atoms if a.residue.chain.index == 0 and a.residue.resSeq != 93 and a.residue != "POPC" and a.residue.resSeq != 130 and a.residue.resSeq != 172 and a.residue.resSeq != 79 and a.residue.resSeq != 341]
	#print("got indices")
	traj = md.load(filename, stride=1000, atom_indices=atom_indices)
	#print("got traj")
	featurizer = DihedralFeaturizer(types = dihedrals)
	features = featurizer.transform(traj_list = traj)
	#print(np.shape(features))
	#print("finished featurizing")

	directory = filename.split("/")
	condition = directory[len(directory)-2]
	dcd_file = directory[len(directory)-1]
	new_file = "%s_features_stride%d.h5" %(dcd_file.rsplit( ".", 1 )[ 0 ] , stride)
	new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features"
	new_condition_dir = "%s/%s" %(new_root_dir, condition)

	new_file_full = "%s/%s/%s" %(new_root_dir, condition, new_file)
	#print("saving features as %s" %new_file_full)

	verbosedump(features, new_file_full)
	return features
示例#7
0
def test_function_featurizer():
    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0]

    # use the dihedral to compute phi for ala
    atom_ind = [[4, 6, 8, 14]]
    func = compute_dihedrals
    # test with args
    f = FunctionFeaturizer(func, func_args={"indices": atom_ind})
    res1 = f.transform([trj0])

    # test with function in a function without any args
    def funcception(trj):
        return compute_phi(trj)[1]

    f = FunctionFeaturizer(funcception)
    res2 = f.transform([trj0])

    # know results
    f3 = DihedralFeaturizer(['phi'], sincos=False)
    res3 = f3.transform([trj0])

    # compare all
    for r in [res2, res3]:
        np.testing.assert_array_almost_equal(res1, r)
示例#8
0
def test_function_featurizer():
    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0]

    # use the dihedral to compute phi for ala
    atom_ind = [[4, 6, 8, 14]]
    func = compute_dihedrals
    # test with args
    f = FunctionFeaturizer(func, func_args={"indices": atom_ind})
    res1 = f.transform([trj0])

    # test with function in a function without any args
    def funcception(trj):
        return compute_phi(trj)[1]

    f = FunctionFeaturizer(funcception)
    res2 = f.transform([trj0])

    # know results
    f3 = DihedralFeaturizer(['phi'], sincos=False)
    res3 = f3.transform([trj0])

    # compare all
    for r in [res2, res3]:
        np.testing.assert_array_almost_equal(res1, r)
def test_get_common_features():
    yaml_file = load_yaml_file(os.path.join(base_dir,"mdl_dir","project.yaml"))
    aligned_dict={}
    for protein in yaml_file["protein_list"]:
        t = load_random_traj(yaml_file, protein)
        aligned_dict[protein] = t.top.to_fasta(chain=0)

    f= DihedralFeaturizer()
    common_feature_dic,_ = _get_common_features(yaml_file,f, aligned_dict, False)
    for protein in yaml_file["protein_list"]:
        t = load_random_traj(yaml_file, protein)
        assert(len(common_feature_dic[protein])==f.transform(t)[0].shape[1])

    return
示例#10
0
def test_code_works():
    # creates a 4-state HMM on the ALA2 data. Nothing fancy, just makes
    # sure the code runs without erroring out
    trajectories = AlanineDipeptide().get_cached().trajectories
    topology = trajectories[0].topology

    indices = topology.select('symbol C or symbol O or symbol N')
    featurizer = DihedralFeaturizer(['phi', 'psi'], trajectories[0][0])

    sequences = featurizer.transform(trajectories)

    hmm = VonMisesHMM(n_states=4, n_init=1)
    hmm.fit(sequences)

    assert len(hmm.timescales_ == 3)
    assert np.any(hmm.timescales_ > 50)
示例#11
0
def test_code_works():
    # creates a 4-state HMM on the ALA2 data. Nothing fancy, just makes
    # sure the code runs without erroring out
    trajectories = AlanineDipeptide().get_cached().trajectories
    topology = trajectories[0].topology

    indices = topology.select('symbol C or symbol O or symbol N')
    featurizer = DihedralFeaturizer(['phi', 'psi'], trajectories[0][0])

    sequences = featurizer.transform(trajectories)

    hmm = VonMisesHMM(n_states=4, n_init=1)
    hmm.fit(sequences)

    assert len(hmm.timescales_ == 3)
    assert np.any(hmm.timescales_ > 50)
def test_DihedralFeaturizer_describe_features_nosincos():
    feat = DihedralFeaturizer(sincos=False)
    rnd_traj = np.random.randint(len(trajectories))
    features = feat.transform([trajectories[rnd_traj]])
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))

    for f in range(25):
        f_index = np.random.choice(len(df))

        atom_inds = df.iloc[f_index].atominds
        feature_value = md.compute_dihedrals(trajectories[rnd_traj],
                                             [atom_inds])
        if feat.sincos:
            func = getattr(np, '%s' % df.iloc[f_index].otherinfo)
            feature_value = func(feature_value)

        assert (features[0][:, f_index] == feature_value.flatten()).all()
def test_DihedralFeaturizer_describe_features_nosincos():
    feat = DihedralFeaturizer(sincos=False)
    rnd_traj = np.random.randint(len(trajectories))
    features = feat.transform([trajectories[rnd_traj]])
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))

    for f in range(25):
        f_index = np.random.choice(len(df))

        atom_inds = df.iloc[f_index].atominds
        feature_value = md.compute_dihedrals(trajectories[rnd_traj],
                                             [atom_inds])
        if feat.sincos:
            func = getattr(np, '%s' % df.iloc[f_index].otherinfo)
            feature_value = func(feature_value)

        assert (features[0][:, f_index] == feature_value.flatten()).all()
示例#14
0
def fit_and_transform(directory, stride=5):

    projected_data_filename = "/scratch/users/enf/b2ar_analysis/phi_psi_chi_stride%d_projected.h5" % stride
    fit_model_filename = "/scratch/users/enf/b2ar_analysis/phi_psi_chi2_stride%s_tica_coords.h5" % stride
    #active_pdb_file = "/scratch/users/enf/b2ar_analysis/3P0G_pymol_prepped.pdb"
    active_pdb_file = "/scratch/users/enf/b2ar_analysis/system_B.pdb"

    tica_model = tICA(n_components=4)

    if not os.path.exists(projected_data_filename):
        print("loading feature files")
        feature_files = get_trajectory_files(directory)
        pool = mp.Pool(mp.cpu_count())
        features = pool.map(load_features, feature_files)
        pool.terminate()
        if not os.path.exists(fit_model_filename):
            print("fitting data to tICA model")
            fit_model = tica_model.fit(features)
            verbosedump(fit_model, fit_model_filename)
            transformed_data = fit_model.transform(features)
            verbosedump(transformed_data, projected_data_filename)
        else:
            print("loading tICA model")
            fit_model = verboseload(fit_model_filename)
            transformed_data = fit_model.transform(features)
            verbosedump(transformed_data, projected_data_filename)
    else:
        fit_model = verboseload(fit_model_filename)
        transformed_data = verboseload(projected_data_filename)

    active_pdb = md.load(active_pdb_file)
    top = active_pdb.topology
    atom_indices = [
        a.index for a in top.atoms
        if a.residue.is_protein and a.residue.resSeq != 341
        and a.residue.name[0:2] != "HI" and a.residue.resSeq != 79
        and a.residue.resSeq != 296 and a.residue.resSeq != 269 and a.residue.
        resSeq != 178 and a.residue.resSeq != 93 and a.residue.name != "NMA"
        and a.residue.name != "NME" and a.residue.name != "ACE"
    ]
    active_pdb = md.load(active_pdb_file, atom_indices=atom_indices)
    featurizer = DihedralFeaturizer(types=['phi', 'psi', 'chi2'])
    active_pdb_features = featurizer.transform(active_pdb)
    active_pdb_projected = fit_model.transform(active_pdb_features)
    print((active_pdb_projected[0:4]))
示例#15
0
def test_pickle():
    """Test pickling an HMM"""
    trajectories = AlanineDipeptide().get_cached().trajectories
    topology = trajectories[0].topology
    indices = topology.select('symbol C or symbol O or symbol N')
    featurizer = DihedralFeaturizer(['phi', 'psi'], trajectories[0][0])
    sequences = featurizer.transform(trajectories)
    hmm = VonMisesHMM(n_states=4, n_init=1)
    hmm.fit(sequences)
    logprob, hidden = hmm.predict(sequences)

    with tempfile.TemporaryFile() as savefile:
        pickle.dump(hmm, savefile)
        savefile.seek(0, 0)
        hmm2 = pickle.load(savefile)

    logprob2, hidden2 = hmm2.predict(sequences)
    assert (logprob == logprob2)
示例#16
0
def read_and_featurize(filename, dihedrals=['phi','psi','chi2'], stride=10):
	print("reading and featurizing %s" %(filename))
	traj = md.load(filename).select('chain A and protein')
	featurizer = DihedralFeaturizer(types = dihedrals)
	features = featurizer.transform(traj_list = traj)
	print("finished featurizing")

	directory = filename.split("/")
	condition = directory[len(directory)-2]
	dcd_file = directory[len(directory)-1]
	new_file = "%s_features_stride%d.h5" %(dcd_file.rsplit( ".", 1 )[ 0 ] , stride)
	new_root_dir = "/home/enf/b2ar_analysis/subsampled_features/"
	new_condition_dir = "%s/%s" %(new_root_dir, condition)

	if not os.path.exists(new_condition_dir):
		os.makedirs(new_condition_dir)

	new_file_full = "%s/%s/%s" %(new_root_dir, condition, new_file)
	print("saving features as %s" %new_file_full)

	verbosedump(features, new_file_full)
	return features
示例#17
0
def read_and_featurize(filename, dihedrals=['phi', 'psi', 'chi2'], stride=10):
    print(("reading and featurizing %s" % (filename)))
    traj = md.load(filename).select('chain A and protein')
    featurizer = DihedralFeaturizer(types=dihedrals)
    features = featurizer.transform(traj_list=traj)
    print("finished featurizing")

    directory = filename.split("/")
    condition = directory[len(directory) - 2]
    dcd_file = directory[len(directory) - 1]
    new_file = "%s_features_stride%d.h5" % (dcd_file.rsplit(".", 1)[0], stride)
    new_root_dir = "/home/enf/b2ar_analysis/subsampled_features/"
    new_condition_dir = "%s/%s" % (new_root_dir, condition)

    if not os.path.exists(new_condition_dir):
        os.makedirs(new_condition_dir)

    new_file_full = "%s/%s/%s" % (new_root_dir, condition, new_file)
    print(("saving features as %s" % new_file_full))

    verbosedump(features, new_file_full)
    return features
示例#18
0
def fit_and_transform(directory, stride=5):
	
	projected_data_filename = "/scratch/users/enf/b2ar_analysis/phi_psi_chi_stride%d_projected.h5" %stride
	fit_model_filename  = "/scratch/users/enf/b2ar_analysis/phi_psi_chi2_stride%s_tica_coords.h5" %stride
	#active_pdb_file = "/scratch/users/enf/b2ar_analysis/3P0G_pymol_prepped.pdb"
	active_pdb_file = "/scratch/users/enf/b2ar_analysis/system_B.pdb"

	tica_model = tICA(n_components=4)

	if not os.path.exists(projected_data_filename):
		print("loading feature files")
		feature_files = get_trajectory_files(directory)
		pool = mp.Pool(mp.cpu_count())
		features = pool.map(load_features, feature_files)
		pool.terminate()
		if not os.path.exists(fit_model_filename):
			print("fitting data to tICA model")
			fit_model = tica_model.fit(features)
			verbosedump(fit_model, fit_model_filename)
			transformed_data = fit_model.transform(features)
			verbosedump(transformed_data, projected_data_filename)
		else:
			print("loading tICA model")
			fit_model = verboseload(fit_model_filename)
			transformed_data = fit_model.transform(features)
			verbosedump(transformed_data, projected_data_filename)
	else:
		fit_model = verboseload(fit_model_filename)
		transformed_data = verboseload(projected_data_filename)

	active_pdb = md.load(active_pdb_file)
	top = active_pdb.topology
	atom_indices = [a.index for a in top.atoms if a.residue.is_protein and a.residue.resSeq != 341 and a.residue.name[0:2] != "HI" and a.residue.resSeq != 79 and a.residue.resSeq != 296 and a.residue.resSeq != 269 and a.residue.resSeq != 178 and a.residue.resSeq != 93 and a.residue.name != "NMA" and a.residue.name != "NME" and a.residue.name != "ACE"]
	active_pdb = md.load(active_pdb_file, atom_indices=atom_indices)
	featurizer = DihedralFeaturizer(types=['phi', 'psi', 'chi2'])
	active_pdb_features = featurizer.transform(active_pdb)
	active_pdb_projected = fit_model.transform(active_pdb_features)
	print(active_pdb_projected[0:4])
def test_FeatureSelector_describe_features():
    rnd_traj = np.random.randint(len(trajectories))
    f_ca = ContactFeaturizer(scheme='CA', ignore_nonprotein=True)
    f1 = f_ca.transform([trajectories[rnd_traj]])
    df1 = pd.DataFrame(f_ca.describe_features(trajectories[rnd_traj]))

    f_dih = DihedralFeaturizer()
    f2 = f_dih.transform([trajectories[rnd_traj]])
    df2 = pd.DataFrame(f_dih.describe_features(trajectories[rnd_traj]))

    df_dict = {}
    df_dict["ca"] = df1
    df_dict["dih"] = df2

    f_comb = FeatureSelector([('ca', f_ca), ('dih', f_dih)])
    f3 = f_comb.transform([trajectories[rnd_traj]])
    df3 = pd.DataFrame(f_comb.describe_features(trajectories[rnd_traj]))
    assert len(df3) == len(df1) + len(df2)
    df4 = pd.concat([df_dict[i] for i in f_comb.feat_list])
    # lets randomly compare 40 features
    for i in np.random.choice(range(len(df3)), 40):
        for j in df3.columns:
            assert eq(df3.iloc[i][j], df4.iloc[i][j])
def test_FeatureSelector_describe_features():
    rnd_traj = np.random.randint(len(trajectories))
    f_ca = ContactFeaturizer(scheme='CA', ignore_nonprotein=True)
    f1 = f_ca.transform([trajectories[rnd_traj]])
    df1 = pd.DataFrame(f_ca.describe_features(trajectories[rnd_traj]))

    f_dih = DihedralFeaturizer()
    f2 = f_dih.transform([trajectories[rnd_traj]])
    df2 = pd.DataFrame(f_dih.describe_features(trajectories[rnd_traj]))

    df_dict = {}
    df_dict["ca"] = df1
    df_dict["dih"] = df2

    f_comb = FeatureSelector([('ca', f_ca), ('dih', f_dih)])
    f3 = f_comb.transform([trajectories[rnd_traj]])
    df3 = pd.DataFrame(f_comb.describe_features(trajectories[rnd_traj]))
    assert len(df3) == len(df1) + len(df2)
    df4 = pd.concat([df_dict[i] for i in f_comb.feat_list])
    # lets randomly compare 40 features
    for i in np.random.choice(range(len(df3)), 40):
        for j in df3.columns:
            assert eq(df3.iloc[i][j], df4.iloc[i][j])
示例#21
0
### Featurization based on dihedral angles for the protein folding trajectories
### Required packages: mdtraj, msmbuilder, glob
### @Chuankai Zhao, [email protected]

import mdtraj as md
import glob
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.utils import verbosedump, verboseload

# Set the path of MD trajectories and the name of topology files.
trajaddress = "/home/amoffet2/msm_network_project/folding/lindorff-larsen_2011_trajs/protein_g-350K/DESRES-Trajectory_NuG2-*-protein/NuG2-*-protein/*.dcd"
top = "/home/amoffet2/msm_network_project/folding/lindorff-larsen_2011_trajs/protein_g-350K/protein_g.pdb"

# Load the trajectories using mdtraj
files = glob.glob(trajaddress)
traj_list = []
for f in files:
    t = md.load(f, top=top, stride=10)
    traj_list.append(t)

# Featurize the trajectories based on phi, psi, chi1 dihedral angles
model = DihedralFeaturizer(types=['phi', 'psi', 'chi1'])
features = model.transform(traj_list)

# Set the path of output file and save the output.
pkl = "/home/czhao37/2-SAXS-Adaptive_Samping/6-protein-G/features/featurized_1mio.pkl"
verbosedump(features, pkl)
示例#22
0
		if traj.endswith(".dcd"):
			traj_files.append("%s/%s" %(traj_dir,traj))
	traj_files.sort()
	traj = md.load(traj_files, top = "/home/harrigan/compute/wetmsm/gpcr/des/system_mae_to_pdb/des_trajs/DESRES-Trajectory_pnas2011b-H-05-all/system.pdb", stride=10)
	traj = traj[0].join(traj[1:])
	traj.save("/home/enf/b2ar_analysis/H-05/%s" %("combined_traj_stride10.h5"))
else:
'''
#print("loading h5 traj")
#traj = md.load("combined_traj_stride10.h5")
'''
'''
if not (os.path.isfile("phi_psi_chi2_features_vd_stride10.h5")):
	print("featurizing")
	phi_psi_chi2 = DihedralFeaturizer(types=['phi','psi','chi2'])
	features = phi_psi_chi2.transform(traj_list = traj)
	print("finished featurizing")
	verbosedump(features, "phi_psi_chi2_features_vd_stride10.h5")
else:
	print("loading existing features")
	features = verboseload("phi_psi_chi2_features_vd_stride10.h5")
	features = [np.concatenate(features)]

if not (os.path.isfile("reduced_phi_psi_chi_stride10.h5")):
	print("Fitting tICA model")
	tica_model = tICA(n_components=4)
	fitted_model = tica_model.fit(features)
	reduced_data = fitted_model.transform(features)
	verbosedump(reduced_data, "reduced_phi_psi_chi_stride10.h5")
	print(tica_model.summarize())
else:
示例#23
0
import mdtraj as md
import pandas as pd
from msmbuilder.msm import MarkovStateModel
from msmbuilder.cluster import KMeans

flist = glob.glob("../trajectory.xtc")

top = md.load("../top.pdb")

trj_list = [md.load(i, top=top) for i in flist]
print("Found %d trajs" % len(trj_list))

f = DihedralFeaturizer(sincos=False)
dump(f, "raw_featurizer.pkl")

feat = f.transform(trj_list)

dump(feat, "raw_features.pkl")

f = load("./featurizer.pkl")
dump(f, "featurizer.pkl")
df1 = pd.DataFrame(f.describe_features(trj_list[0]))
dump(df1, "feature_descriptor.pkl")
feat = f.transform(trj_list)

dump(feat, "features.pkl")

t = tICA(lag_time=100, n_components=2, kinetic_mapping=False)

tica_feat = t.fit_transform(feat)
示例#24
0
		if traj.endswith(".dcd"):
			traj_files.append("%s/%s" %(traj_dir,traj))
	traj_files.sort()
	traj = md.load(traj_files, top = "/home/harrigan/compute/wetmsm/gpcr/des/system_mae_to_pdb/des_trajs/DESRES-Trajectory_pnas2011b-H-05-all/system.pdb", stride=10)
	traj = traj[0].join(traj[1:])
	traj.save("/home/enf/b2ar_analysis/H-05/%s" %("combined_traj_stride10.h5"))
else:
'''
#print("loading h5 traj")
#traj = md.load("combined_traj_stride10.h5")
'''
'''
if not (os.path.isfile("phi_psi_chi2_features_vd_stride10.h5")):
    print("featurizing")
    phi_psi_chi2 = DihedralFeaturizer(types=['phi', 'psi', 'chi2'])
    features = phi_psi_chi2.transform(traj_list=traj)
    print("finished featurizing")
    verbosedump(features, "phi_psi_chi2_features_vd_stride10.h5")
else:
    print("loading existing features")
    features = verboseload("phi_psi_chi2_features_vd_stride10.h5")
    features = [np.concatenate(features)]

if not (os.path.isfile("reduced_phi_psi_chi_stride10.h5")):
    print("Fitting tICA model")
    tica_model = tICA(n_components=4)
    fitted_model = tica_model.fit(features)
    reduced_data = fitted_model.transform(features)
    verbosedump(reduced_data, "reduced_phi_psi_chi_stride10.h5")
    print((tica_model.summarize()))
else: