示例#1
0
def rand(l1, l2):
    """
    Compute the Adjusted Rand Index between two clusterings.
    Parameters:
    - - - - -
    l1, l2 : array
        cluster assignments
    """

    return ars(l1, l2)
def matchClusteringLabels(filename):
    text = [
        line.strip()
        for line in open('/home/haohanw/metagenomics/clusteringResult_' +
                         filename + '_filterMouse.txt')
    ]

    result1 = {}
    for line in text:
        items = line.split('\t')
        result1[items[0][1:]] = int(items[1])

    rs1 = []
    rs2 = []
    text = [
        line.strip() for line in open('/home/haohanw/metagenomics/Kaiju/' +
                                      filename + '.label')
    ]
    result2 = {}
    for line in text:
        items = line.split('\t')
        rs1.append(int(items[1]))
        rs2.append(result1[items[0]])
        result2[items[0]] = int(items[1])

    print 'organizing results, now calculating'

    a = ars(rs1, rs2)
    print 'final score', abs(a)

    f = open(
        '/home/haohanw/metagenomics/clusteringCompare_' + filename + '.txt',
        'w')

    sorted_result1 = sorted(result1.items(), key=operator.itemgetter(1))
    for (n, v) in sorted_result1:
        if n in result2:
            f.writelines(n + '\t' + str(v) + '\t' + str(result2[n]) + '\n')
    f.close()
示例#3
0
 def train(self):
     step = 0
     sum = 0
     try:
         while not self.coord.should_stop():
             step += 1
             loss, _, norm = self.sess.run([self.loss, self.opt, self.norm])
             sum += loss
             if step % 500 == 0:
                 assignments, = self.sess.run(
                     [self.assignments],
                     feed_dict={self.test_images: self.raw_images})
                 print('%s\t%0.4f\t%0.4f\t%0.4f\t%0.4f\t%0.4f\t%0.4f' %
                       (step, ars(self.labels, assignments),
                        mis(self.labels,
                            assignments), v_score(self.labels, assignments),
                        self.purity_score(self.labels,
                                          assignments), norm, sum / step))
     except tf.errors.OutOfRangeError:
         print 'Done training'
     finally:
         self.coord.request_stop()
     self.coord.join(self.threads)
示例#4
0
model_2._fit_single(X, random_state=None)

# In[5]:

model_2.fit(X)

# In[8]:

model_2.row_labels_

# In[9]:

predicted_labels_2 = model_2.row_labels_
print(nmi(true_labels, predicted_labels_2), acc(true_labels,
                                                predicted_labels_2),
      ars(true_labels, predicted_labels_2),
      amis(true_labels, predicted_labels_2))

# In[11]:

model_5 = NMTFcoclus_ONM3F.ONM3F(n_row_clusters=4, n_col_clusters=4)
model_5.fit(X)

# In[15]:

predicted_labels_5 = model_5.row_labels_
print(nmi(true_labels, predicted_labels_5), acc(true_labels,
                                                predicted_labels_5),
      ars(true_labels, predicted_labels_5),
      amis(true_labels, predicted_labels_5))
示例#5
0
X = np.append(X, noise, axis = 1)
X=  normalize(X)
# Y = SelfOrganizingSwarm(iterations=250, alpha=1, beta = 0.9,delta=0.001, theta=3).fit_transform(X)
# Y = PCA(2).fit_transform(X)
# Y =TSNE().fit_transform(X)
Y= GSOM().fit_transform(X, lr = 1.0, beta=0.5, sf=0.6, wd=0.175, fd=0.8)#X,lr = 1.0, beta=0.0,sf=0.01, fd=0.75, wd=0.5)
# fig = plt.figure()
# ax = Axes3D(fig)00
# ax.scatter(X.T[0], X.T.[1], X.T[2],c = color, alpha=0.5, edgecolors='none')
# plt.show()
plt.subplot(211)
# ax = fig.add_subplot(211)
plt.scatter(Y.T[0], Y.T[1], s = 15, c = plt.cm.jet(color/(n_clusters*1.0)), edgecolors='none', alpha=0.375)


labs = KMeans(n_clusters).fit(Y).labels_

plt.subplot(212)
plt.scatter(Y.T[0], Y.T[1], s = 15, c =plt.cm.jet(labs/(n_clusters*1.0)), edgecolors='none', alpha=0.375)


print 'ars ', ars(color,labs)
print 'ami ', ami(color, labs)


#
# Y = Isomap().fit_transform(X)
# ax2 = fig.add_subplot(121)
# ax2.scatter(Y.T[0], Y.T[1], c = color, edgecolors='none', alpha=0.5)

plt.show()
示例#6
0
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score as ars

digits_train = pd.read_csv('./data/optdigits.tra', header=None)
digits_test = pd.read_csv('./data/optdigits.tes', header=None)

# 0-63 features, 64 target
X_train = digits_train[np.arange(64)]
y_train = digits_train[64]

X_test = digits_test[np.arange(64)]
y_test = digits_test[64]

kmeans = KMeans(n_clusters=10)
kmeans.fit(X_train)
y_pred = kmeans.predict(X_test)

print(ars(y_test, y_pred))