Пример #1
0
def test_kcenters_4():
    # test for predict() using non-euclidean distance. because of the
    # way the code is structructured, this takes a different path
    model = KCenters(n_clusters=10, metric='cityblock')
    data = np.random.randn(100, 2)
    labels1 = model.fit_predict([data])
    labels2 = model.predict([data])

    eq(labels1[0], labels2[0])
    all_pairs = scipy.spatial.distance.cdist(data, model.cluster_centers_, metric='cityblock')
    eq(labels2[0], np.argmin(all_pairs, axis=1))
Пример #2
0
def test_kcenters_3():
    # test for predict using euclidean distance

    m  = KCenters(n_clusters=10)
    data = np.random.randn(100, 2)
    labels1 = m.fit_predict([data])
    labels2 = m.predict([data])

    eq(labels1[0], labels2[0])
    all_pairs = scipy.spatial.distance.cdist(data, m.cluster_centers_)
    eq(labels2[0], np.argmin(all_pairs, axis=1))
Пример #3
0
def test_kcenters_1():
    # make sure all the shapes are correct of the fit parameters
    m = KCenters(n_clusters=3)
    m.fit([np.random.randn(23,2), np.random.randn(10,2)])

    assert isinstance(m.labels_, list)
    assert isinstance(m.distances_, list)
    assert len(m.labels_) == 2
    eq(m.cluster_centers_.shape, (3,2))
    eq(m.labels_[0].shape, (23,))
    eq(m.labels_[1].shape, (10,))
    eq(m.distances_[0].shape, (23,))
    eq(m.distances_[1].shape, (10,))

    eq(m.fit_predict([np.random.randn(10, 2)])[0].shape, (10,))
    assert np.all(np.logical_not(np.isnan(m.distances_[0])))
Пример #4
0
#sequences of coordinates of ligand aromatic ring and Aps113
sequences_all = []
for this_sim in simulations:
    this_seq = util.featurize_RawPos(inds,this_sim)
    sequences_all.extend(this_seq)

#print len(sequences_all)
#print sequences_all[-1].shape

#average position of Asp113
#res_pos_ave = np.mean(res_pos_A_1[0],axis = 0)
# 
time_step = util.calc_time_step(times_path,stride = LOAD_STRIDE)
# 
clustering = KCenters(n_clusters = 10)
assignments = clustering.fit_predict(sequences_all)
centers = clustering.cluster_centers_

#print len(assignments)
#print assignments[1].shape

msm = MarkovStateModel(lag_time=180, verbose=True).fit(assignments)
countsmat = msm.countsmat_
transmat = msm.transmat_
#print np.sum(countsmat)

#np.savetxt('/home/shenglan/TryMSMbuilder/output/assignments.out',assignments, fmt = '%3.0f')
np.savetxt('/home/shenglan/TryMSMbuilder/output/countsmat.out',countsmat,fmt = '%8.4g')
np.savetxt('/home/shenglan/TryMSMbuilder/output/transmat.out',transmat,fmt = '%10.4g')

dist_path = '/home/shenglan/TryMSMbuilder/output/ten_ligands/dist_to_binding'\
+'_s'+str(LOAD_STRIDE)+'.out'
pickle.dump(distances, open(dist_path, 'wb'))

# get N positions
sequences_all = []
for this_sim in simulations:
    this_seq = util.featurize_RawPos(inds_N, [this_sim])
    sequences_all.extend(this_seq)
seq_path = '/home/shenglan/TryMSMbuilder/output/ten_ligands/sequences' + '_s' + str(
    LOAD_STRIDE) + '.out'
pickle.dump(sequences_all, open(seq_path, 'wb'))

clustering = KCenters(n_clusters=N_CLUSTER)
geo_assign = clustering.fit_predict(sequences_all)
centers = clustering.cluster_centers_

geo_assign_path = '/home/shenglan/TryMSMbuilder/output/ten_ligands/KC_geoassign_c' \
+str(N_CLUSTER)+'_s'+str(LOAD_STRIDE)+'.out'
pickle.dump(geo_assign, open(geo_assign_path, 'wb'))

micro_msm = MarkovStateModel(lag_time=1,
                             reversible_type='transpose',
                             ergodic_cutoff='off',
                             verbose=True).fit(geo_assign)

msm_path = '/home/shenglan/TryMSMbuilder/output/ten_ligands/KC_msm_c'+str(N_CLUSTER)+ \
'_s'+str(LOAD_STRIDE)+'.out'
pickle.dump(micro_msm, open(msm_path, 'wb'))
TS2_ticproj_list_array = []
tica_TS2_sequences = []
for line in open("ticproj_TS2"):
    TS2_ticproj_list_array.append(line.strip())
    temp1 = numpy.loadtxt("%s/TS2_project_onto_GS_tics/%s_ticproj.txt" %
                          (outputdir, line.strip()))
    temp1 = temp1[:, 0:num_tics_for_clustering]
    tica_TS2_sequences.append(temp1)

tmp_counter = 0

kcenters = KCenters(n_clusters=nMicro)
#kcenters = KCenters(n_clusters=num_tics_for_clustering)        # Fr :)

kcenters_sequences = kcenters.fit_predict(
    tica_sequences)  #here it is ground state tica sequences

print "begin to plot the microstate implied timescale into the objective dir"
#plot implied timescale

lag_times = range(10, 100, 10)
#adjust variables
n_timescales = 5  #adjust variables

msm_timescales = implied_timescales(kcenters_sequences,
                                    lag_times,
                                    n_timescales=n_timescales,
                                    msm=MarkovStateModel(
                                        verbose=True,
                                        reversible_type='transpose'))
        distances.append(this_lig)

dist_path = '/home/shenglan/TryMSMbuilder/output/ten_ligands/dist_to_binding'\
+'_s'+str(LOAD_STRIDE)+'.out'
pickle.dump(distances,open(dist_path,'wb'))

# get N positions
sequences_all = []
for this_sim in simulations:
    this_seq = util.featurize_RawPos(inds_N,[this_sim])
    sequences_all.extend(this_seq)
seq_path = '/home/shenglan/TryMSMbuilder/output/ten_ligands/sequences'+'_s'+str(LOAD_STRIDE)+'.out'
pickle.dump(sequences_all,open(seq_path,'wb'))

clustering = KCenters(n_clusters = N_CLUSTER)
geo_assign = clustering.fit_predict(sequences_all)
centers = clustering.cluster_centers_

geo_assign_path = '/home/shenglan/TryMSMbuilder/output/ten_ligands/KC_geoassign_c' \
+str(N_CLUSTER)+'_s'+str(LOAD_STRIDE)+'.out'
pickle.dump(geo_assign,open(geo_assign_path,'wb'))

micro_msm = MarkovStateModel(lag_time=1, reversible_type = 'transpose', 
ergodic_cutoff = 'off'
,verbose=True).fit(geo_assign)

msm_path = '/home/shenglan/TryMSMbuilder/output/ten_ligands/KC_msm_c'+str(N_CLUSTER)+ \
'_s'+str(LOAD_STRIDE)+'.out'
pickle.dump(micro_msm,open(msm_path,'wb'))

# map assignments
Пример #8
0
    inds_N.append(iis)
print inds_N
#sequences of coordinates of ligands
sequences_all = []
for this_sim in simulations:
    if use_COM:
        this_seq = util.featurize_RawPos(inds_all,this_sim,average = True)
    else:
        this_seq = util.featurize_RawPos(inds_N,this_sim)
    sequences_all.extend(this_seq)
    
seqfile = '/home/shenglan/TryMSMbuilder/output/sequences'+'_s'+str(LOAD_STRIDE)+'.out'
pickle.dump(sequences_all, open(seqfile,'wb'))
    
KC_clustering = KCenters(n_clusters = N_CLUSTER)
KC_assignments = KC_clustering.fit_predict(sequences_all)
KC_centers = KC_clustering.cluster_centers_

KM_clustering = KCenters(n_clusters = N_CLUSTER)
KM_assignments = KM_clustering.fit_predict(sequences_all)
KM_centers = KM_clustering.cluster_centers_

KC_output_file = '/home/shenglan/TryMSMbuilder/output/KC_centers_c'+str(N_CLUSTER)+'_s'+str(LOAD_STRIDE)+'.out'
KM_output_file = '/home/shenglan/TryMSMbuilder/output/KM_centers_c'+str(N_CLUSTER)+'_s'+str(LOAD_STRIDE)+'.out'
np.savetxt(KC_output_file,KC_centers,fmt = '%10.4g')
np.savetxt(KM_output_file,KM_centers,fmt = '%10.4g')

KC_assign_file = '/home/shenglan/TryMSMbuilder/output/KC_assign_'+str(N_CLUSTER)+'_s'+str(LOAD_STRIDE)+'.out'
KM_assign_file = '/home/shenglan/TryMSMbuilder/output/KM_assign_'+str(N_CLUSTER)+'_s'+str(LOAD_STRIDE)+'.out'
pickle.dump(KC_assignments,open(KC_assign_file,'wb'))
pickle.dump(KM_assignments,open(KM_assign_file,'wb'))
Пример #9
0
def test_kcenters_5():
    model1 = KCenters(n_clusters=10, random_state=0, metric='euclidean')
    model2 = KCenters(n_clusters=10, random_state=0, metric='sqeuclidean')

    data = np.random.RandomState(0).randn(100, 2)
    eq(model1.fit_predict([data])[0], model2.fit_predict([data])[0])
Пример #10
0
print inds_N
#sequences of coordinates of ligands
sequences_all = []
for this_sim in simulations:
    if use_COM:
        this_seq = util.featurize_RawPos(inds_all, this_sim, average=True)
    else:
        this_seq = util.featurize_RawPos(inds_N, this_sim)
    sequences_all.extend(this_seq)

seqfile = '/home/shenglan/TryMSMbuilder/output/sequences' + '_s' + str(
    LOAD_STRIDE) + '.out'
pickle.dump(sequences_all, open(seqfile, 'wb'))

KC_clustering = KCenters(n_clusters=N_CLUSTER)
KC_assignments = KC_clustering.fit_predict(sequences_all)
KC_centers = KC_clustering.cluster_centers_

KM_clustering = KCenters(n_clusters=N_CLUSTER)
KM_assignments = KM_clustering.fit_predict(sequences_all)
KM_centers = KM_clustering.cluster_centers_

KC_output_file = '/home/shenglan/TryMSMbuilder/output/KC_centers_c' + str(
    N_CLUSTER) + '_s' + str(LOAD_STRIDE) + '.out'
KM_output_file = '/home/shenglan/TryMSMbuilder/output/KM_centers_c' + str(
    N_CLUSTER) + '_s' + str(LOAD_STRIDE) + '.out'
np.savetxt(KC_output_file, KC_centers, fmt='%10.4g')
np.savetxt(KM_output_file, KM_centers, fmt='%10.4g')

KC_assign_file = '/home/shenglan/TryMSMbuilder/output/KC_assign_' + str(
    N_CLUSTER) + '_s' + str(LOAD_STRIDE) + '.out'