Пример #1
0
    def test_2d_1cluster(self):
        #create simple data:
        a = range(0, 11)
        print a
        d = []
        for i in a:
            for j in a:
                d.append([i*1.0, j*1.0])

        dbscanner = Dbscan(np.array(d), 3, 2.0)
        dbscanner.run()
        plotting.plotting(dbscanner.getClusterList(),dbscanner.getNoise())
Пример #2
0
 def test_plotting(self):
     #some dummy data
     number, x_coordinate, y_coordinate = loadtxt('testdata/eps3-minpts5-cluster5-noise20.dat', unpack = True)
     D=[None]*len(x_coordinate)
     for ii in range(len(x_coordinate)):
         D[ii]=[x_coordinate[ii],y_coordinate[ii]]
     #put in the data we want to use
     minNeighbors = 5
     #epsilon = 2
     data = array(D)
     # use dbscan
     dbscanner = Dbscan(data, minNeighbors)
     result = dbscanner.run()
     # use plotting
     plotting.plotting(result)
Пример #3
0
 def test_plotting(self):
     #some dummy data
     number, x_coordinate, y_coordinate = loadtxt('testdata/eps3-minpts5-cluster5-noise20.dat', unpack = True)
     D=[None]*len(x_coordinate)
     for ii in range(len(x_coordinate)):
         D[ii]=[x_coordinate[ii],y_coordinate[ii]]
     #put in the data we want to use
     minNeighbors = 5
     epsilon = 3.
     data = np.array(D, dtype=np.float64)
     # use dbscan
     dbscanner = Dbscan(data, minNeighbors, epsilon)
     dbscanner.run()
     # use plotting
     plotting.plotting(dbscanner.getClusterList(),dbscanner.getNoise())
Пример #4
0
 def test_dbscan2(self):
     testdata2='testdata/eps0p01-minpts1-cluster0-noise100.dat'
     number, x_coordinate, y_coordinate      = np.loadtxt(testdata2, unpack = True)
     D=[None]*len(x_coordinate)
     for ii in range(len(x_coordinate)):
         D[ii]=[x_coordinate[ii],y_coordinate[ii]]
     D=np.array(D)
     epsilon=0.01
     minNeighbors=1
     dbscanner = Dbscan(D, minNeighbors,epsilon)
     test1 = dbscanner.run()
     datennoise=test1[len(test1)-1]
     datennoise=np.array(datennoise)
     b=[]
     cluster1=test1[:len(test1)-1]
     assert cluster1[0]==b
     assert datennoise.all()==D.all()
Пример #5
0
 def test_dbscan2(self):
     testdata2='testdata/eps0p01-minpts1-cluster0-noise100.dat'
     number, x_coordinate, y_coordinate      = np.loadtxt(testdata2, unpack = True)
     D=[None]*len(x_coordinate)
     for ii in range(len(x_coordinate)):
         D[ii]=[x_coordinate[ii],y_coordinate[ii]]
     D=np.array(D)
     epsilon=0.01
     minNeighbors=1
     dbscanner = Dbscan(D, minNeighbors, epsilon)
     dbscanner.run()
     datennoise=dbscanner.getNoise()
     datennoise=np.array(datennoise)
     b=[]
     cluster1=dbscanner.getClusterList()
     np.testing.assert_array_equal(cluster1,b)
     np.testing.assert_array_equal(datennoise,D)
Пример #6
0
 def test_dbscan1(self):
     testdata='testdata/eps2-minpts3-cluster1-noise0.dat'
     number, x_coordinate, y_coordinate      = np.loadtxt(testdata, unpack = True)
     D=[None]*len(x_coordinate)
     for ii in range(len(x_coordinate)):
         D[ii]=[x_coordinate[ii],y_coordinate[ii]]
     D=np.array(D)
     epsilon=4.0
     minNeighbors=3
     dbscanner = Dbscan(D, minNeighbors, epsilon)
     dbscanner.run()
     datennoise=dbscanner.getNoise()
     Dresultall=[]
     for daten in dbscanner.getClusterList():
         Dresult=[]
         for item in daten:
             Dresult.append(item)
         Dresultall.append(Dresult)  #
     #assert D==a
     Dr=np.array(Dresultall)
     a=np.array([])
     np.testing.assert_array_equal(a,datennoise)
Пример #7
0
 def test_dbscan1(self):
     testdata='testdata/eps2-minpts3-cluster1-noise0.dat'
     number, x_coordinate, y_coordinate      = np.loadtxt(testdata, unpack = True)
     D=[None]*len(x_coordinate)
     for ii in range(len(x_coordinate)):
         D[ii]=[x_coordinate[ii],y_coordinate[ii]]
     D=np.array(D)
     epsilon=2
     minNeighbors=3
     dbscanner = Dbscan(D,minNeighbors,epsilon,)
     test1 = dbscanner.run()
     datennoise=test1[len(test1)-1]
     Dresultall=[]
     for daten in test1[:len(test1)-1]:
         Dresult=[]
         for item in daten:
             Dresult.append(item)
         Dresultall.append(Dresult)  #
     #assert D==a
     Dr=np.array(Dresultall)
     a=[]
     assert datennoise==a
     assert D.all()==Dr.all()
Пример #8
0
    def test_2d_1cluster(self):
        #create simple data:
        a = range(0, 11)
        print a
        d = []
        for i in a:
            for j in a:
                d.append([i * 1.0, j * 1.0])

        dbscanner = Dbscan(np.array(d), 3, 2.0)
        dbscanner.run()
        plotting.plotting(dbscanner.getClusterList(), dbscanner.getNoise())
Пример #9
0
    def test_dbscan(self):
        exact_labels = []
        label_1 = "Iris-setosa"
        label_2 = "Iris-versicolor"
        label_3 = "Iris-virginica"

        for item in self.data["label"]:
            if item == label_1:
                exact_labels.append(2)
            elif item == label_2:
                exact_labels.append(3)
            else:
                exact_labels.append(1)

        epsilon = 2
        min_pts = 2
        dbscan = Dbscan(epsilon, min_pts)

        X_train, X_test, y_train, y_test = train_test_split(self.features,
                                                            self.exact_labels,
                                                            test_size=0.33,
                                                            random_state=42)
        dbscan.load_data(X_train.to_numpy().tolist())
        dbscan.train()
        labels = dbscan.predict(X_test.to_numpy().tolist())

        accurate_sum = 0
        for i in range(len(labels)):
            if labels[i] == y_test[i]:
                accurate_sum += 1

        print("Akurasi DBScan: ", accurate_sum / len(labels))

        clustering_labels = DBSCAN(eps=epsilon,
                                   min_samples=min_pts).fit_predict(X_train)
        clustering_labels = [c + 3 for c in clustering_labels]

        sklearn_accurate_sum = 0
        for i in range(len(labels)):
            if clustering_labels[i] == y_test[i]:
                sklearn_accurate_sum += 1

        print("Akurasi DBScan sklearn: ", sklearn_accurate_sum / len(labels))
Пример #10
0
 def test_plotting(self):
     #some dummy data
     number, x_coordinate, y_coordinate = loadtxt(
         'testdata/eps3-minpts5-cluster5-noise20.dat', unpack=True)
     D = [None] * len(x_coordinate)
     for ii in range(len(x_coordinate)):
         D[ii] = [x_coordinate[ii], y_coordinate[ii]]
     #put in the data we want to use
     minNeighbors = 5
     epsilon = 3.
     data = np.array(D, dtype=np.float64)
     # use dbscan
     dbscanner = Dbscan(data, minNeighbors, epsilon)
     dbscanner.run()
     # use plotting
     plotting.plotting(dbscanner.getClusterList(), dbscanner.getNoise())
Пример #11
0
 def test_dbscan2(self):
     testdata2 = 'testdata/eps0p01-minpts1-cluster0-noise100.dat'
     number, x_coordinate, y_coordinate = np.loadtxt(testdata2, unpack=True)
     D = [None] * len(x_coordinate)
     for ii in range(len(x_coordinate)):
         D[ii] = [x_coordinate[ii], y_coordinate[ii]]
     D = np.array(D)
     epsilon = 0.01
     minNeighbors = 1
     dbscanner = Dbscan(D, minNeighbors, epsilon)
     dbscanner.run()
     datennoise = dbscanner.getNoise()
     datennoise = np.array(datennoise)
     b = []
     cluster1 = dbscanner.getClusterList()
     np.testing.assert_array_equal(cluster1, b)
     np.testing.assert_array_equal(datennoise, D)
Пример #12
0
 def test_dbscan1(self):
     testdata = 'testdata/eps2-minpts3-cluster1-noise0.dat'
     number, x_coordinate, y_coordinate = np.loadtxt(testdata, unpack=True)
     D = [None] * len(x_coordinate)
     for ii in range(len(x_coordinate)):
         D[ii] = [x_coordinate[ii], y_coordinate[ii]]
     D = np.array(D)
     epsilon = 4.0
     minNeighbors = 3
     dbscanner = Dbscan(D, minNeighbors, epsilon)
     dbscanner.run()
     datennoise = dbscanner.getNoise()
     Dresultall = []
     for daten in dbscanner.getClusterList():
         Dresult = []
         for item in daten:
             Dresult.append(item)
         Dresultall.append(Dresult)  #
     #assert D==a
     Dr = np.array(Dresultall)
     a = np.array([])
     np.testing.assert_array_equal(a, datennoise)
Пример #13
0
from sklearn import metrics
from sklearn.datasets import make_moons
from dbscan import Dbscan
import numpy as np
import matplotlib.pyplot as plt


X, labels_true = make_moons(n_samples=200, noise=0.1, random_state=19)
Y = np.array_split(X, 2)

db = Dbscan(eps=0.25, min_samples=12).partial_fit(Y[0]).partial_fit(Y[1])
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print('Estimated number of clusters: %d' % n_clusters_)
print('Estimated number of noise points: %d' % n_noise_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
print("Adjusted Rand Index: %0.3f" % metrics.adjusted_rand_score(labels_true, labels))
print("Adjusted Mutual Information: %0.3f" % metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels))

unique_labels = set(labels)
colors = [plt.cm.Spectral(each)
          for each in np.linspace(0, 1, len(unique_labels))]
Пример #14
0
from sklearn import metrics
from sklearn.datasets import make_moons
from dbscan import Dbscan
import numpy as np
import matplotlib.pyplot as plt

X, labels_true = make_moons(n_samples=200, noise=0.1, random_state=19)
db = Dbscan(eps=0.25, min_samples=12).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

print('Estimated number of clusters: %d' % n_clusters_)
print('Estimated number of noise points: %d' % n_noise_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
print("Adjusted Rand Index: %0.3f" %
      metrics.adjusted_rand_score(labels_true, labels))
print("Adjusted Mutual Information: %0.3f" %
      metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, labels))

unique_labels = set(labels)
colors = [
    plt.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))
]