plt.scatter(x16, y16, c='purple')
    plt.scatter(x17, y17, c='navy')
    plt.scatter(x18, y18, c='violet')
    plt.scatter(x19, y19, c='purple')
    plt.scatter(x20, y20, c='coral')

    plt.show()

    #result2 is the predicted class of every sample
    #label is the autual class of samples
    label = []
    result2 = []
    for i in range(len(result[1])):
        label.append(int(result[1][i].category))
    for j in range(len(clf.labels_)):
        result2.append(clf.labels_[j])

    import evaluate as A
    purity = A.purity(result2, label)
    NMI = A.NMI(result2, label)
    TP, TN, FP, FN = A.contingency_table(result2, label)
    rand_index = A.rand_index(result2, label)
    precision = A.precision(result2, label)
    recall = A.recall(result2, label)
    F_measure = A.F_measure(result2, label)

    print("Purity:" + str(purity))
    print("Precision:" + str(precision))
    print("Recall:" + str(recall))
    print("F_measue:" + str(F_measure))
示例#2
0
                if p.clusterId == i:
                    cluster['cluster'].add(p)
            self.clusters.append(cluster)


if __name__ == '__main__':

    es, ps, fs = [], [], []

    for i in xrange(1, 10):
        points = readPoints('dataset2.dat', 'dataset2-label.dat')
        dbscan = DBscanClustering(points, i, 100)
        dbscan.clustering()

        es.append(i)
        ps.append(purity(dbscan.clusters, dbscan.points))
        fs.append(fscore(dbscan.clusters, dbscan.points))

    print '\t'.join([str(i) for i in es])
    print '\t'.join([str(i) for i in ps])
    print '\t'.join([str(i) for i in fs])

    print "\n\n"

    ms, ps, fs = [], [], []

    for i in xrange(10, 200, 10):
        points = readPoints('dataset2.dat', 'dataset2-label.dat')
        dbscan = DBscanClustering(points, 6, i)
        dbscan.clustering()
示例#3
0
                    d = distance(point, cluster["mean"])
                    ds.append(d)

                minId = ds.index(min(ds))

                if point.clusterId != minId:
                    self.clusters[point.clusterId]["cluster"].remove(point)
                    self.clusters[minId]["cluster"].add(point)
                    point.clusterId = minId
                    stop = False

            self._reCalculateMean()


if __name__ == "__main__":

    ks, ps, fs = [], [], []

    for k in xrange(5, 26):
        points = readPoints("dataset1.dat", "dataset1-label.dat")
        kmeans = KmeansClustering(points, k)

        kmeans.clustering()
        ks.append(k)
        ps.append(purity(kmeans.clusters, kmeans.points))
        fs.append(fscore(kmeans.clusters, kmeans.points))

    print "\t".join([str(i) for i in ks])
    print "\t".join([str(i) for i in ps])
    print "\t".join([str(i) for i in fs])