示例#1
0
def test_DBSCAN():
    X = np.array([[1, 1.1, 1], [1.2, .8, 1.1], [.8, 1, 1.2], [3.7, 3.5, 3.6],
                  [3.9, 3.9, 3.5], [3.4, 3.5, 3.7], [15, 15, 15]])
    eps = 0.5
    min_points = 2
    dbscanalgo = DBSCAN(eps=eps, min_points=min_points)
    dbscanalgo.run(X, "Synthetic Data")
示例#2
0
def test_duplicate_dual_dbscan():
    data = np.array([
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [1, 1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
        [-1, -1],
    ])
    impl = DBSCAN(0.1, 5)

    impl.Fit(data)

    r_labels = np.array([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 2) == True
示例#3
0
  def _run_model(self):

    eps = float(self.epsilonLineEdit.text())
    minPoints = int(self.minPointsLineEdit.text())

    self.model = DBSCAN(epsilon=eps, minPoints=minPoints)
    self.model.fit(self.data)
    self.run = True
    self.accept()
    self.close()
示例#4
0
def test_large_dbscan():
    data, r_labels = datasets.make_blobs(n_samples=1000, centers=1)

    impl = DBSCAN(0.6, 4)
    ref = cluster.DBSCAN(0.6, 4)
    impl.Fit(data)
    ref.fit(data)
    r_labels = ref.labels_
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 1) == True
示例#5
0
def test_simple_single_dbscan():
    data = np.array([
        [1, 1],
    ])
    impl = DBSCAN(1, 1)

    impl.Fit(data)

    r_labels = np.array([0])
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 1) == True
示例#6
0
def test_dimensionality_dbscan():
    data, r_labels = datasets.make_blobs(n_samples=288,
                                         n_features=16,
                                         cluster_std=0.2,
                                         random_state=31)

    impl = DBSCAN(0.6, 4)
    impl.Fit(data)
    ref = cluster.DBSCAN(0.6, 4)
    ref.fit(data)

    assert True == t.check_clusters(ref.labels_, impl.GetLabels(data.shape[0]),
                                    3)
示例#7
0
    def test_fit_with_small_eps(self):
        expected_core_sample_indices = np.array([])
        expected_components = np.array([])
        expected_labels = np.array([-1, -1, -1, -1, -1, -1, -1])

        data = self.get_two_clusters()
        dbscan = DBSCAN(eps=0.1, min_samples=3)
        dbscan.fit(data)

        np.testing.assert_equal(expected_core_sample_indices,
                                dbscan.core_sample_indices_)
        np.testing.assert_equal(expected_components, dbscan.components_)
        np.testing.assert_equal(expected_labels, dbscan.labels_)
示例#8
0
def test_clear_blobs_dbscan():
    centers = ((-5, -5), (5, 5))
    data, _ = datasets.make_blobs(n_samples=100,
                                  centers=centers,
                                  cluster_std=0.1)

    ref = cluster.DBSCAN(0.6, 4)
    impl = DBSCAN(0.6, 4)
    impl.Fit(data)
    ref.fit(data)
    r_labels = ref.labels_
    i_labels = impl.GetLabels(data.shape[0])

    assert t.check_clusters(r_labels, i_labels, 2) == True
示例#9
0
def test_epsilon_dbscan(dbscanparams):
    data, _ = datasets.make_blobs(n_samples=512, n_features=2, random_state=73)

    impl = DBSCAN(dbscanparams[0], dbscanparams[1])
    ref = cluster.DBSCAN(dbscanparams[0], dbscanparams[1])
    impl.Fit(data)
    ref.fit(data)

    i_labels = impl.GetLabels(data.shape[0])
    r_labels = ref.labels_
    assert np.unique(i_labels).shape[0] == np.unique(r_labels).shape[0]
    assert True == t.check_clusters_with_allowance(
        r_labels, i_labels,
        np.unique(i_labels).shape[0], 0.05)
示例#10
0
class DbscanDialog(QDialog):

  def __init__(self, data):
    super().__init__()

    self.setWindowTitle("DBSCAN özellikleri")
    self.data = data
    self.model = None
    self.run = False

    self._configure()

  def _configure(self):

    mainLayout = QVBoxLayout()

    hor1 = QHBoxLayout()
    epsilonLabel = QLabel("Epsilon: ")
    self.epsilonLineEdit = QLineEdit("0.3")
    hor1.addWidget(epsilonLabel)
    hor1.addWidget(self.epsilonLineEdit)

    hor2 = QHBoxLayout()
    minPointsLabel = QLabel("Min Points: ")
    self.minPointsLineEdit = QLineEdit("10")
    hor2.addWidget(minPointsLabel)
    hor2.addWidget(self.minPointsLineEdit)

    trainButton = QPushButton("Modeli çalıştır")
    trainButton.clicked.connect(self._run_model)

    mainLayout.addLayout(hor1)
    mainLayout.addLayout(hor2)
    mainLayout.addWidget(trainButton)

    self.setLayout(mainLayout)
    
  def _run_model(self):

    eps = float(self.epsilonLineEdit.text())
    minPoints = int(self.minPointsLineEdit.text())

    self.model = DBSCAN(epsilon=eps, minPoints=minPoints)
    self.model.fit(self.data)
    self.run = True
    self.accept()
    self.close()
示例#11
0
文件: test.py 项目: batuwa/dbscan
    def test_dbscan(self):
        """Test the dbscan algorithm on a small test data.
        """
        data = np.array([[1, 1.1], [1.2, 0.8], [0.8, 1], [3.7, 4], [3.9, 3.9],
                         [3.6, 4.1], [10, 10]])

        clusters = DBSCAN(eps=0.5, min_pts=2).fit(data)
        self.assertEqual(clusters, [1, 1, 1, 2, 2, 2, -1])
示例#12
0
def main():
    datasets = get_datasets()

    min_points = 5
    eps = [20, 17, 11, 4]

    for i, dataset in enumerate(datasets):
        # Plot kdist plot to determine EPS param
        kdist_data = get_kdist_data(dataset, min_points)
        plot_data(kdist_data)

        # Get dbscan object
        dbscan = DBSCAN(min_points, eps[i])

        labels = dbscan.fit(dataset)

        print_labels(labels)

        plot_labeled_data(dataset, labels)
示例#13
0
def algorithm_router(choice, data):
    if choice == '1':  #kmeans
        kmeans_obj = KMeans(data=data, k=3, iteration=500)
        kmeans_obj.kmeans_main()
        print(kmeans_obj.cluster_avg)
        kmeans_obj.show_res()
    else:  #dbscan
        dbscan_obj = DBSCAN(data=data, epsilon=0.9, min_pts=6)
        dbscan_obj.dbscan_main()
        dbscan_obj.show_res()
示例#14
0
def test_double_fit_dbscan():
    data1, r_labels1 = datasets.make_blobs(n_samples=288,
                                           centers=6,
                                           cluster_std=0.2,
                                           random_state=31)
    data2, r_labels2 = datasets.make_blobs(n_samples=288,
                                           centers=6,
                                           cluster_std=0.2,
                                           random_state=31)

    impl = DBSCAN(0.6, 4)
    impl.Fit(data1)

    i_labels1 = impl.GetLabels(data1.shape[0])

    impl.Fit(data2)
    i_labels2 = impl.GetLabels(data2.shape[0])

    t2 = t.check_clusters_with_allowance(i_labels2, i_labels1, 6, .01)

    assert t2 == True
示例#15
0
文件: test.py 项目: byrmuslu/MLGUI
def dbscan_visualization_test(data, eps=0.3, minPoints=10):
    test = DBSCAN(eps, minPoints)
    test.fit(data)
    plot_automation(test)
示例#16
0
import pyspark as ps
from dbscan import DBSCAN
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler
from time import time
import numpy as np
import sys

if __name__ == '__main__':
    i = int(sys.argv[1])
    centers = [[1, 1], [-1, -1], [1, -1]]
    samples = [750, 7500, 75000, 750000, 7500000]
    eps = [0.3, 0.1, 0.03, 0.01, 0.003]
    n_part = [16, 128, 1024, 8192, 65536]
    sc = ps.SparkContext()
    X, labels_true = make_blobs(n_samples=samples[i],
                                centers=centers,
                                cluster_std=0.4,
                                random_state=0)

    X = StandardScaler().fit_transform(X)

    test_data = sc.parallelize(enumerate(X))
    start = time()
    dbscan = DBSCAN(eps[i], 10, max_partitions=n_part[i])
    dbscan.train(test_data)
    result = np.array(dbscan.assignments())
    run_time = time() - start
    with open('benchmark.csv', 'w') as f:
        f.write('\n%i,%f,%i,%i' % (samples[i], eps[i], n_part[i], run_time))
示例#17
0
# -*- coding: utf-8 -*-
"""
Project Code: DBSCAN v1.1
@author: Deep.I Inc. @Jongwon Kim
Revision date: 2020-12-09
Contact Info: :
    https://deep-eye.tistory.com
    https://deep-i.net
"""

from dbscan import DBSCAN
from scipy import io

#%% Run DEMO
x = io.loadmat('./sample/sample.mat')['X']
# INIT DBSCAN
dbscan = DBSCAN(x, 1.5, 4)
# CLUSTERING
idx, noise = dbscan.run()
# SORTING
g_cluster, n_cluster = dbscan.sort()
# Visualization
dbscan.plot()
示例#18
0
from data_loader import load_file
from k_means import Kmeans
from dbscan import DBSCAN
from random import shuffle
from utils import calculate_accuracy
from sklearn.cluster import KMeans
from utils import euclidean_distance
import pry

raw_data = load_file('iris.data')
classes = set([x[-1] for x in raw_data])
class_dict = {}
test_data = {}
train_data = []
for kelas in classes:
    class_dict[kelas] = list(filter(lambda x: x[-1] == kelas, raw_data))
    shuffle(class_dict[kelas])

    test_data[kelas] = [x[:-1] for x in class_dict[kelas][:10]]
    train_data += [x[:-1] for x in class_dict[kelas][10:]]

db_scan = DBSCAN(1, 0.5)
pry()
db_scan.fit(train_data[:10])
db_scan.clusters
示例#19
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics.cluster import normalized_mutual_info_score

from utils import *
from datasets import *
from classifiers import *
from metrics import *

from agglomerative_clustering import AgglomerativeClustering
from dbscan import DBSCAN

X, y = read_dataset(dataset='Iris')

print("--- AgglomerativeClustering ---")
model = AgglomerativeClustering(n_clusters=3,
                                verbose=False,
                                linkage='complete',
                                distance_metric='l1')
cluster_pred = model.fit_predict(X)
print("adjusted_rand_score", metrics.adjusted_rand_score(y, cluster_pred))
print(" normalized_mutual_info_score",
      normalized_mutual_info_score(y, cluster_pred))

print("--- DBSCAN ---")
cluster_pred = DBSCAN(eps=1, MinPts=5).fit_predict(X)
print("adjusted_rand_score", metrics.adjusted_rand_score(y, cluster_pred))
print(" normalized_mutual_info_score",
      normalized_mutual_info_score(y, cluster_pred))
示例#20
0
                  [3.9, 3.9, 3.5], [3.4, 3.5, 3.7], [15, 15, 15]])
    eps = 0.5
    min_points = 2
    dbscanalgo = DBSCAN(eps=eps, min_points=min_points)
    dbscanalgo.run(X, "Synthetic Data")


def test_HAC():
    test = [[1, 1.1, 1], [1.2, .8, 1.1], [.8, 1, 1.2], [3.7, 3.5, 3.6],
            [3.9, 3.9, 3.5], [3.4, 3.5, 3.7], [15, 15, 15]]
    hac = HAC()
    for i in xrange(1, 4):
        hac.clusterLevel = i + 1
        hac.run(test, "Synthetic Data with Cluster Level " + str(i))


dbscan = DBSCAN()
hac = HAC()
experiment = Experiments()

experiment.runSynthetic(dbscan)
experiment.runSynthetic(hac)

ind = 500
dim = 3
#experiment.run(dbscan, True, ind, dim)

# test_HAC()
# experiment.runSynthetic(hac)
# experiment.run(hac, True, ind, dim)
示例#21
0
    #  #  heatmap_point = []



if __name__ == '__main__':
    # Example of pypadis.DBSCAN
    from sklearn.datasets.samples_generator import make_blobs
    from sklearn.preprocessing import StandardScaler
    import matplotlib.pyplot as plt
    import matplotlib.patches as patches
    import matplotlib.cm as cm
    from time import time
    from itertools import izip
    import os

    X = []
    with open("staypoints.csv","rb") as f:
        reader = csv.reader(f)
        for line in reader:
            X.append(line)
        X = np.array(X, np.float)

    sc = ps.SparkContext()
    test_data = sc.parallelize(enumerate(X))
    start = time()
    dbscan = DBSCAN(eps=0.02, min_samples=20, metric='precomputed')
    dbscan.train(test_data)
    result = np.array(dbscan.assignments())
    print 'clusters count: %s' % len(set(result[: 1]))
    import pdb; pdb.set_trace()
示例#22
0
文件: main.py 项目: gbroques/dbscan
def main():
    num_clusters = 4
    clusters = generate_data(num_clusters, seed=1)
    dbscan = DBSCAN(eps=7, min_samples=5)
    dbscan.fit(clusters)
    plot_clusters(clusters, dbscan.labels_, dbscan.components_)
示例#23
0
import numpy as np
import matplotlib.pyplot as plt

from dbscan import DBSCAN

if __name__ == '__main__':
    epsilon = 0.5
    min_pts = 2
    points = np.array(
        [(np.cos(x), np.sin(x)) for x in np.linspace(0, 2 * np.pi, 100)] +
        [(2 * np.cos(x), 2 * np.sin(x)) for x in np.linspace(0, 2 * np.pi, 100)] +
        [(3 * np.cos(x), 3 * np.sin(x))
         for x in np.linspace(0, 2 * np.pi, 100)]
    )

    def euclidean_distance_2d(x, y):
        return np.sqrt((x[0] - y[0]) ** 2 + (x[1] - y[1])**2)

    dbscan = DBSCAN(euclidean_distance_2d, epsilon, min_pts)
    clusters = dbscan.cluster(points)

    for points in clusters.values():
        pt_cluster = np.array(points)
        plt.scatter(pt_cluster[:, 0], pt_cluster[:, 1])
    plt.show()
示例#24
0
 def fit(self):
     data = self.get_two_clusters()
     dbscan = DBSCAN(eps=self.eps, min_samples=self.min_samples)
     dbscan.fit(data)
     return dbscan
示例#25
0
from dbscan import DBSCAN
from sklearn.datasets import make_moons

x, _ = make_moons(n_samples=300, noise=0.1)

radius = 0.2
min_points = 10

print('Radius = ' + str(radius) + ', Minpoints = ' + str(min_points))

model = DBSCAN(x, radius, min_points)

#Fitting model to dataset
point_labels, clusters = model.fit()

print('Number of clusters: ' + str(clusters - 1))

#Plotting result
model.plot_result(x, point_labels, clusters)
示例#26
0
cnoise = 0.1  # standard deviation of Gaussian noise added to the data
cfactor = 0.3  # scale factor between inner and outer circles

# Setting for moons
mnoise = 0.1  # standard deviation of Gaussian noise added to the data

# Blobs
# Generate points -- "blobs"
bX, _ = generate_dataset("blobs",
                         n_samples=n_samples,
                         centers=bcenters,
                         n_features=bn_features,
                         cluster_std=bcluster_std,
                         random_state=random_state)
# Cluster points by DBSCAN
bdbs = DBSCAN(epsilon=1.0, min_samples=5)
bdbs.fit_predict(bX)
# Plot clustering results
plot_clusters(bdbs)

# Circles
# Generate points -- "circles"
cX, _ = generate_dataset("circles",
                         n_samples=n_samples,
                         noise=cnoise,
                         factor=cfactor,
                         random_state=random_state)
# Cluster points by DBSCAN
cdbs = DBSCAN(epsilon=0.2, min_samples=5)
cdbs.fit_predict(cX)
# Plot clustering results
示例#27
0

if __name__ == '__main__':
    X1 = create_artificial_gaussiandata(np.array([1, 2]),
                                        np.array([[2, 1], [1, 2]]), 20)
    X2 = create_artificial_gaussiandata(np.array([10, 8]),
                                        np.array([[2, 1], [1, 2]]), 20)
    X = np.concatenate([X1, X2], 0)  # 2つのndarrayを結合
    # データの可視化
    plotter = PlotUtility()
    plotter.scatter_plot(X1[:, 0],
                         X1[:, 1], [1 for _ in range(len(X1))],
                         size=5)
    plotter.scatter_plot(X2[:, 0],
                         X2[:, 1], [2 for _ in range(len(X2))],
                         size=5)
    plotter.show()
    # クラスタリング
    dbscan = DBSCAN(2, 3)
    dist_matrix = make_distance_matrix(X)
    cluster = dbscan.fit(dist_matrix)
    print(cluster)
    # 可視化
    plotter = PlotUtility()
    for i in range(int(min(cluster)), int(max(cluster)) + 1):
        c = devide(X, cluster, i)
        plotter.scatter_plot(c[:, 0],
                             c[:, 1], [i for _ in range(len(c))],
                             size=5)
    plotter.show()
示例#28
0
 def test_raises_error_for_invalid_eps(self):
     with self.assertRaises(ValueError):
         DBSCAN(eps=0)
示例#29
0
        elif linkage_type == 'average' :
            agglo_accuracy_average += accuracy
        elif linkage_type == 'average-group' :
            agglo_accuracy_average_group += accuracy
        print ('Agglomerative - ' + str(linkage_type))
        print ('Accuracy\t', accuracy)
        print ('Format {Real class : cluster}')
        print ('Dict\t\t', str(dict))
        print ()
    
    # DBSCAN
    for i in range (0, len(epss)) :
        eps = epss[i]
        min_pts = min_ptss[i]

        dbscan = DBSCAN(eps, min_pts)
        sk_dbscan = sklearn_DBSCAN(eps=eps, min_samples=min_pts)

        dbscan.fit(X_train)
        result = dbscan.predict(X_test)
        accuracy, dict = clustering_accuracy_score(np.asarray(y_test), np.asarray(result))
        dbscan_accuracy += accuracy
        print ('DBSCAN')
        print ('Epsilon : {} Min Points : {}'.format(eps, min_pts))
        print ('Accuracy\t', accuracy)
        print ('Format {Real class : cluster}')
        print ('Dict\t\t', str(dict))
        print ()

    k += 1
示例#30
0
 def test_raises_error_for_invalid_min_samples(self):
     with self.assertRaises(ValueError):
         DBSCAN(min_samples=0)