Пример #1
0
def build_msm(clusterer_dir, lag_time):
	clusterer = verboseload(clusterer_dir)
	n_clusters = np.shape(clusterer.cluster_centers_)[0]
	labels = clusterer.labels_
	msm_modeler = MarkovStateModel(lag_time=lag_time)
	print("fitting msm to trajectories with %d clusters and lag_time %d" %(n_clusters, lag_time))
	msm_modeler.fit_transform(labels)
	verbosedump(msm_modeler, "/scratch/users/enf/b2ar_analysis/msm_model_%d_clusters_t%d" %(n_clusters, lag_time))
	print("fitted msm to trajectories with %d states" %(msm_modeler.n_states_))
	#np.savetxt("/scratch/users/enf/b2ar_analysis/msm_%d_clusters_t%d_transmat.csv" %(n_clusters, lag_time), msm_modeler.transmat_, delimiter=",")
	#G = nx.from_numpy_matrix(msm_modeler.transmat_)
	#nx.write_edgelist(G, "/scratch/users/enf/b2ar_analysis/msm_%d_clusters_t%d_edgelist" %(n_clusters, lag_time), msm_modeler.transmat_, delimiter=",")
	transmat = msm_modeler.transmat_

	mapping = msm_modeler.mapping_

	edges = open("/scratch/users/enf/b2ar_analysis/msm_%d_clusters_t%d_edgelist.csv" %(n_clusters, lag_time), "wb")
	for i in range(0, msm_modeler.n_states_):
		if i == 0:
			for j in range(0, msm_modeler.n_states_):
				edges.write(";")
				edges.write("%d" %mapping[j])
			edges.write("\n")

		edges.write("%d" %(mapping[i]))
		for j in range(0, msm_modeler.n_states_):
			prob = transmat[i][j]
			edges.write(";")
			if prob > 0.000001:
				edges.write("%f" %prob)
			else:
				edges.write("0")
		edges.write("\n")
	edges.close()
Пример #2
0
def build_msm(clusterer_dir, lag_time):
	clusterer = verboseload(clusterer_dir)
	n_clusters = np.shape(clusterer.cluster_centers_)[0]
	labels = clusterer.labels_
	msm_modeler = MarkovStateModel(lag_time=lag_time)
	print("fitting msm to trajectories with %d clusters and lag_time %d" %(n_clusters, lag_time))
	msm_modeler.fit_transform(labels)
	verbosedump(msm_modeler, "/scratch/users/enf/b2ar_analysis/msm_model_%d_clusters_t%d" %(n_clusters, lag_time))
	print("fitted msm to trajectories with %d states" %(msm_modeler.n_states_))
	'''
Пример #3
0
def build_msm(clusterer_dir,
              lag_time,
              msm_model_dir,
              prior_counts=0.0,
              ergodic_cutoff='on'):
    clusterer = verboseload(clusterer_dir)
    n_clusters = np.shape(clusterer.cluster_centers_)[0]
    labels = clusterer.labels_
    msm_modeler = MarkovStateModel(lag_time=lag_time,
                                   prior_counts=prior_counts,
                                   ergodic_cutoff=ergodic_cutoff)
    print(("fitting msm to trajectories with %d clusters and lag_time %d" %
           (n_clusters, lag_time)))
    msm_modeler.fit_transform(labels)
    print(msm_modeler)
    verbosedump(msm_modeler, msm_model_dir)
    print(("fitted msm to trajectories with %d states" %
           (msm_modeler.n_states_)))
    return msm_modeler
    '''
Пример #4
0
def build_msm(clusterer_dir, lag_time):
    clusterer = verboseload(clusterer_dir)
    n_clusters = np.shape(clusterer.cluster_centers_)[0]
    labels = clusterer.labels_
    msm_modeler = MarkovStateModel(lag_time=lag_time)
    print(("fitting msm to trajectories with %d clusters and lag_time %d" %
           (n_clusters, lag_time)))
    msm_modeler.fit_transform(labels)
    verbosedump(
        msm_modeler,
        "/scratch/users/enf/b2ar_analysis/msm_model_%d_clusters_t%d" %
        (n_clusters, lag_time))
    print(("fitted msm to trajectories with %d states" %
           (msm_modeler.n_states_)))
    #np.savetxt("/scratch/users/enf/b2ar_analysis/msm_%d_clusters_t%d_transmat.csv" %(n_clusters, lag_time), msm_modeler.transmat_, delimiter=",")
    #G = nx.from_numpy_matrix(msm_modeler.transmat_)
    #nx.write_edgelist(G, "/scratch/users/enf/b2ar_analysis/msm_%d_clusters_t%d_edgelist" %(n_clusters, lag_time), msm_modeler.transmat_, delimiter=",")
    transmat = msm_modeler.transmat_

    mapping = msm_modeler.mapping_

    edges = open(
        "/scratch/users/enf/b2ar_analysis/msm_%d_clusters_t%d_edgelist.csv" %
        (n_clusters, lag_time), "wb")
    for i in range(0, msm_modeler.n_states_):
        if i == 0:
            for j in range(0, msm_modeler.n_states_):
                edges.write(";")
                edges.write("%d" % mapping[j])
            edges.write("\n")

        edges.write("%d" % (mapping[i]))
        for j in range(0, msm_modeler.n_states_):
            prob = transmat[i][j]
            edges.write(";")
            if prob > 0.000001:
                edges.write("%f" % prob)
            else:
                edges.write("0")
        edges.write("\n")
    edges.close()
Пример #5
0
rs = np.random.RandomState(42)

# Load Fs Peptide Data
trajs = FsPeptide().get().trajectories

# Extract Backbone Dihedrals
featurizer = DihedralFeaturizer(types=['chi1'])
diheds = featurizer.fit_transform(trajs)

# Perform Dimensionality Reduction
tica_model = tICA(lag_time=2, n_components=2)
tica_trajs = tica_model.fit_transform(diheds)

# Perform Clustering
clusterer = MiniBatchKMeans(n_clusters=12, random_state=rs)
clustered_trajs = clusterer.fit_transform(tica_trajs)

# Construct MSM
msm = MarkovStateModel(lag_time=2)
assignments = msm.fit_transform(clustered_trajs)

# Plot Stacked Distributions
a = np.concatenate(assignments, axis=0)
d = np.concatenate(diheds, axis=0)

# Plot Stacked Distributions of the sine of each Chi1 angle
# within an arbitrary set of states {2, 5, 0}
path_data = [d[a == i][:, ::2] for i in [2, 5, 0]]
msme.plot_stackdist(path_data)
Пример #6
0
lag_time = 10
n_clusters = 2000
sys = 'Src-'
n_timescales = 10
lagTime = 50  # 5ns

# loading the data
dataset = []
import glob
for file in glob.glob('highRMSF_phi_psi/*.npy'):
    a = np.array(np.load(file))
    dataset.append(a)

# building tica
tica = tICA(n_components=n_components, lag_time=lag_time)
tica.fit(dataset)
tica_traj = tica.transform(dataset)
pickle.dump(tica, open(sys + '_tICs_' + str(n_components) + '.pkl', 'wb'))

# clustering
states = msmbuilder.cluster.KMeans(n_clusters=n_clusters)
states.fit(tica_traj)
io.dump(
    states, sys + '_tICs_' + str(n_components) + 'nCluster_' +
    str(n_clusters) + '.pkl')

# making MSM
msm = MarkovStateModel(lag_time=lagTime, n_timescales=n_timescales)
msm.fit_transform(cl.labels_)
io.dump(msm, 'MSM' + sys)
Пример #7
0
def construct_graph(msm_modeler_dir, clusterer_dir, n_clusters, tica_lag_time, msm_lag_time, graph_file, inactive = None, active = None, pnas_clusters_averages = None, tica_clusters_averages = None, docking=None, macrostate = None):
	clusterer = verboseload(clusterer_dir)
	n_clusters = np.shape(clusterer.cluster_centers_)[0]
	labels = clusterer.labels_
	if not os.path.exists(msm_modeler_dir):
		msm_modeler = MarkovStateModel(lag_time=msm_lag_time, n_timescales = 5, sliding_window = True, verbose = True)
		print("fitting msm to trajectories with %d clusters and lag_time %d" %(n_clusters, msm_lag_time))
		msm_modeler.fit_transform(labels)
		verbosedump(msm_modeler, msm_modeler_dir)
	else:
		msm_modeler = verboseload(msm_modeler_dir)
	graph = nx.DiGraph()
	mapping = msm_modeler.mapping_
	inv_mapping = {v: k for k, v in mapping.items()}
	transmat = msm_modeler.transmat_

	for i in range(0, msm_modeler.n_states_):
		for j in range(0, msm_modeler.n_states_):
			prob = transmat[i][j]
			if prob > 0.0:
				if prob < 0.001: prob = 0.001
				original_i = inv_mapping[i]
				original_j = inv_mapping[j]
				graph.add_edge(original_i, original_j, prob = float(prob), inverse_prob = 1.0 / float(prob), weight = float(prob))

	print(graph.number_of_nodes())

	if inactive is not None:
		scores = convert_csv_to_map_nocombine(inactive)
		for cluster in scores.keys():
			cluster_id = int(cluster[7:len(cluster)])
			if cluster_id in graph.nodes():
				score = scores[cluster][0]
				graph.node[cluster_id]["inactive_pnas"] = score

	if active is not None:
		scores = convert_csv_to_map_nocombine(active)
		for cluster in scores.keys():
			cluster_id = int(re.search(r'\d+',cluster).group()) 
			if cluster_id in graph.nodes():
				score = scores[cluster][0]
				graph.node[cluster_id]["active_pnas"] = score

	if pnas_clusters_averages is not None:
		scores = convert_csv_to_map_nocombine(pnas_clusters_averages)
		for cluster in scores.keys():
			cluster_id = int(re.search(r'\d+',cluster).group()) 
			if cluster_id in graph.nodes():
				graph.node[cluster_id]["tm6_tm3_dist"] = scores[cluster][0]
				graph.node[cluster_id]["rmsd_npxxy_active"] = scores[cluster][2]
				graph.node[cluster_id]["rmsd_connector_active"] = scores[cluster][4]

	if tica_clusters_averages is not None:
		scores = convert_csv_to_map_nocombine(tica_clusters_averages)
		for cluster in scores.keys():
			cluster_id = int(re.search(r'\d+',cluster).group()) 
			if cluster_id in graph.nodes():
				for i in range(0,len(scores[cluster])):
					graph.node[cluster_id]["tIC%d" %(i+1)] = scores[cluster][i]

	if docking is not None:
		scores = convert_csv_to_map_nocombine(docking)
		for cluster in scores.keys():
			cluster_id = int(cluster[7:len(cluster)])
			if cluster_id in graph.nodes():
				score = scores[cluster][0]
				graph.node[cluster_id]["docking"] = score

	if macrostate is not None:
		macromodel = verboseload(macrostate)
		for cluster_id in range(0, n_clusters):
			if cluster_id in graph.nodes():
				microstate_cluster_id = mapping[cluster_id]
				macrostate_cluster_id = macromodel.microstate_mapping_[microstate_cluster_id]
				#print(macrostate_cluster_id)
				graph.node[cluster_id]["macrostate"] = int(macrostate_cluster_id)

	nx.write_graphml(graph, graph_file)
Пример #8
0
                                      linkage='ward',
                                      metric='rmsd',
                                      landmark_strategy='stride',
                                      random_state=None,
                                      max_landmarks=None,
                                      ward_predictor='ward')
    ctrajs = clusterer.fit_transform(trajs)

    lags = (np.arange(1, 50, 1) / to_ns).astype(int)
    n_timescales = 50
    timescales = np.zeros((lags.shape[0], n_timescales))
    eigenvalues = np.zeros((lags.shape[0], n_timescales))

    for idx, lag in enumerate(lags):
        msm = MarkovStateModel(lag_time=lag, n_timescales=n_timescales)
        msm.fit_transform(ctrajs)
        timescales[idx] = msm.timescales_
        eigenvalues[idx] = msm.eigenvalues_[1:]

    for idx in range(n_timescales):
        plt.plot(lags * to_ns, timescales.T[idx])
    plt.savefig('figures/rmsd_timescales.png')
    plt.ylim((0, int(np.max(timescales.T[1]))))
    plt.savefig('figures/rmsd_timescales-detail.png')
    plt.clf()

    for idx in range(n_timescales):
        plt.plot(lags * to_ns, eigenvalues.T[idx])
    plt.savefig('figures/rmsd_eigenvalues.png')
    # Make Pipeline
Пример #9
0
def construct_graph(msm_modeler_dir,
                    clusterer_dir,
                    n_clusters,
                    tica_lag_time=5,
                    msm_lag_time=10,
                    graph_file="~/graph_file.graphml",
                    msm_object=None,
                    clusterer_object=None,
                    inactive=None,
                    active=None,
                    pnas_clusters_averages=None,
                    tica_clusters_averages=None,
                    docking=None,
                    macrostate=None,
                    cluster_attributes=None,
                    msm_attributes=None,
                    min_prob=1e-4):
    """
  Construct a .graphml graph based on an MSM and attributes of clusters and/or MSM states.
  Saves .graphml graph to disk and returns it as well. 

  *needs networkx python package to use*
  
  Parameters
  ----------
  msm_modeler_dir: location on disk of verboseload loadable msm object 
  clusterer_dir: location on disk of verboseload loadable clusterer object 
  n_clusters: number of clusters
  tica_lag_time: tica lag time
  msm_lag_time: msm lag time 
  graph_file: location on disk for saving graphml file 
  msm_object: pass msm object directly instead of loading from disk 
  clusterer_object: pass clusterer object directly instead of loading from disk 
  cluster_attributes: dictionary that maps names of attributes to lists of size n_clusters
    where each entry in the list is the value of that attribute for that cluster. for example,
    if n_clusters=3, an example cluster_attributes dict might be: 
      cluster_attributes = {'tyr75-his319_dist': [7.0, 6.0, 8.0], 'phe289-chi2': [90.0, 93.0, 123.2]}
  msm_attributes: dictionary that maps names of attributes to lists of size n_msm_states
    where each entry in the list is the value of that attribute for that msm state. for example,
    if n_msm_states=3, an example cluster_attributes dict might be: 
      msm_attributes = {'tyr75-his319_dist': [7.0, 6.0, 8.0], 'phe289-chi2': [90.0, 93.0, 123.2]}
  """

    if clusterer_object is None:
        clusterer = verboseload(clusterer_dir)
    else:
        clusterer = clusterer_object
    n_clusters = np.shape(clusterer.cluster_centers_)[0]

    labels = clusterer.labels_

    if not os.path.exists(msm_modeler_dir):
        if msm_object is not None:
            msm_modeler = msm_object
        else:
            msm_modeler = MarkovStateModel(lag_time=msm_lag_time,
                                           n_timescales=5,
                                           sliding_window=True,
                                           verbose=True)
        print(("fitting msm to trajectories with %d clusters and lag_time %d" %
               (n_clusters, msm_lag_time)))
        msm_modeler.fit_transform(labels)
        verbosedump(msm_modeler, msm_modeler_dir)
    else:
        msm_modeler = verboseload(msm_modeler_dir)
    graph = nx.DiGraph()
    mapping = msm_modeler.mapping_
    inv_mapping = {v: k for k, v in list(mapping.items())}
    transmat = msm_modeler.transmat_

    for i in range(0, msm_modeler.n_states_):
        for j in range(0, msm_modeler.n_states_):
            prob = transmat[i][j]
            if prob < min_prob:
                continue
            original_i = inv_mapping[i]
            original_j = inv_mapping[j]
            graph.add_edge(original_i,
                           original_j,
                           prob=float(prob),
                           inverse_prob=1.0 / float(prob))

    print("Number of nodes in graph:")
    print((graph.number_of_nodes()))

    if inactive is not None:
        scores = convert_csv_to_map_nocombine(inactive)
        for cluster in list(scores.keys()):
            cluster_id = int(cluster[7:len(cluster)])
            if cluster_id in graph.nodes():
                score = scores[cluster][0]
                graph.node[cluster_id]["inactive_pnas"] = score

    if active is not None:
        scores = convert_csv_to_map_nocombine(active)
        for cluster in list(scores.keys()):
            cluster_id = int(re.search(r'\d+', cluster).group())
            if cluster_id in graph.nodes():
                score = scores[cluster][0]
                graph.node[cluster_id]["active_pnas"] = score

    if pnas_clusters_averages is not None:
        scores = convert_csv_to_map_nocombine(pnas_clusters_averages)
        for cluster in list(scores.keys()):
            cluster_id = int(re.search(r'\d+', cluster).group())
            if cluster_id in graph.nodes():
                graph.node[cluster_id]["tm6_tm3_dist"] = scores[cluster][0]
                graph.node[cluster_id]["rmsd_npxxy_active"] = scores[cluster][
                    2]
                graph.node[cluster_id]["rmsd_connector_active"] = scores[
                    cluster][4]

    if tica_clusters_averages is not None:
        scores = convert_csv_to_map_nocombine(tica_clusters_averages)
        for cluster in list(scores.keys()):
            cluster_id = int(re.search(r'\d+', cluster).group())
            if cluster_id in graph.nodes():
                for i in range(0, len(scores[cluster])):
                    graph.node[cluster_id]["tIC%d" %
                                           (i + 1)] = scores[cluster][i]

    if docking is not None:
        scores = convert_csv_to_map_nocombine(docking)
        for cluster in list(scores.keys()):
            cluster_id = int(cluster[7:len(cluster)])
            if cluster_id in graph.nodes():
                score = scores[cluster][0]
                graph.node[cluster_id]["docking"] = score

    if macrostate is not None:
        macromodel = verboseload(macrostate)
        for cluster_id in range(0, n_clusters):
            if cluster_id in graph.nodes():
                microstate_cluster_id = mapping[cluster_id]
                macrostate_cluster_id = macromodel.microstate_mapping_[
                    microstate_cluster_id]
                #print(macrostate_cluster_id)
                graph.node[cluster_id]["macrostate"] = int(
                    macrostate_cluster_id)

    if cluster_attributes is not None:
        for attribute in cluster_attributes.keys():
            for cluster_id in mapping.keys():
                graph.node[cluster_id][attribute] = float(
                    cluster_attributes[attribute][cluster_id])

    if msm_attributes is not None:
        for attribute in msm_attributes.keys():
            for cluster_id in mapping.keys():
                graph.node[cluster_id][attribute] = float(
                    msm_attributes[attribute][mapping[cluster_id]])

    nx.write_graphml(graph, graph_file)
    return (graph)
import matplotlib
matplotlib.use('Agg')
from msmbuilder.msm import MarkovStateModel
from msmbuilder.msm import implied_timescales
import pylab as plt
import matplotlib as mpl

file = 'dataset_nark.best_nonredu.pkl'
name = file[:-4]
cl = pickle.load(open(name + "-GA-mbkm_mdl.pkl"))
n_timescales = 5

lag_times = range(5, 225, 5)

ts = np.zeros([n_timescales, len(lag_times)])
ns_lt = np.ndarray.tolist(np.array(lag_times))
index = 0

for i in lag_times:
    msm = MarkovStateModel(lag_time=i, n_timescales=n_timescales)
    clL = cl.labels_
    #clL10 = [clL[i][::0] for i in range(len(clL))]
    msm.fit_transform(clL)
    print(msm.timescales_)
    len(msm.timescales_)
    ts[:, index] = msm.timescales_
    index = index + 1

np.save('nark_best_nonredu_ts10_cl400_ns_lt', ns_lt)
np.save('nark_best_nonredu_ts10_cl400_ts', ts)
Пример #11
0
font = {'family':'Times New Roman', 'size': 12}
plt.rc('font', **font)
cl = pickle.load(open('clustering.pkl','rb'))
n_timescales=10
stepS = 1.2
lag_times=[1, 2, 3,4, 5,6, 7,8, 9,10,11,12,13,14,15,16,17]
l = len(lag_times)

ts=np.zeros([10,l])

ns_lt=np.ndarray.tolist(stepS*np.array(lag_times))
index = 0

for i in lag_times:
    msm=MarkovStateModel(lag_time=i, n_timescales=n_timescales)
    msm.fit_transform(cl.labels_)
    ts[:,index]=msm.timescales_
    index=index+1
    io.dump(msm,'MSM'+str(i)+'.pkl')

"""
for i in lag_times:
    msm = io.load('MSM'+str(i)+'.pkl')
    ts[:,index]=msm.timescales_
    index=index+1
"""

fig, ax = plt.subplots(1,1)

for i in range(10):
  j=i+1
Пример #12
0
	a = trjN.split('/')[-1]
	topN = a.split('_md')[0]
	rawtop = 'rawTrj/'+roundN+'-rwTop/'+topN+'.prmtop'
	return rawtop
	
def findRawtrj(trjN):
	roundN = trjN.split('/')[1][0:3]
	sysName = trjN.split('/')[-1]
	rawTrj = 'rawTrj/'+roundN+'-rwTrj/'+sysName
	return rawTrj
	
cluster = pickle.load(open(cl,'rb'))
clL = cluster.labels_

msm = MarkovStateModel(lag_time=10,n_timescales=10)
msm.fit_transform(clL)

trjs = clL
N = n_samples
inits = ad.findStarting([trjs], N, method=method)

T = []
for trj in sorted(glob.glob(Trjs)):
	T.append(trj)

count = 0
for init in inits:
	structure = msm.draw_samples(clL, 1)[init]
	print structure
	top = findTop(T[structure[0][0]])
	rawTrj = findRawtrj(T[structure[0][0]])
Пример #13
0
# the address should be the address of trajectories corresponding to dataset
# findStarting(trjs, N, method='random')

import adaptivsamplingMSM as ad
from msmbuilder.msm import MarkovStateModel

cluster=pickle.load(open('clustering.pkl','rb'))
trjs = cluster.labels_
N = n_samples
T = []
for trj in sorted(glob.glob('rawTrj/MD1-rwTrj/*.mdcrd')):
	T.append(trj)
inits = ad.findStarting([trjs], N, method='leastPop')
msm=MarkovStateModel(lag_time=1, n_timescales=10)
msm.fit_transform(cluster.labels_)
OPF = []
structure = msm.draw_samples(trjs, 1)
for i in range(n_samples):
	try:
		init = structure[msm.mapping_[inits[i]]]
	except KeyError:
		print KeyError
	traj = T[init[0][0]]
	frame = init[0][1]
	OPF.append({'traj':traj, 'frame':frame})
json.dump(OPF, open("ClsInf.txt",'w'))

### Step 5: making the CPPtraj inputs

import json