Пример #1
0
def get_eps(X, neigh=2):
    eps_dist = np.sort(calculate_kn_distance(X, neigh=neigh))
    plt.hist(eps_dist, bins=60)
    plt.ylabel('n')
    plt.xlabel('Epsilon distance')
    plt.show()

    rotor = Rotor()
    curve_xy = np.concatenate(
        [np.arange(eps_dist.shape[0]).reshape(-1, 1),
         eps_dist.reshape(-1, 1)], 1)
    rotor.fit_rotate(curve_xy)
    rotor.plot_elbow()
    e_idx = rotor.get_elbow_index()

    return curve_xy[e_idx]
Пример #2
0
#1rst evaluation
# Nearest neighbors to find the optimal epsilon (maximum distance) https://towardsdatascience.com/machine-learning-clustering-dbscan-determine-the-optimal-value-for-epsilon-eps-python-example-3100091cfbc
nbrs = NearestNeighbors(n_neighbors = 5, algorithm = 'kd_tree').fit(xyz_nn) #['auto', 'ball_tree', 'kd_tree', 'brute']
distances, indices = nbrs.kneighbors(xyz_nn) #the indices of the nearest neighbors 
distances = np.sort(distances, axis=0)
distances = distances[:,4]
plt.plot(distances)

y = np.array(distances)
x = np.linspace(0,len(x),len(x))
xy = np.vstack((x,y)).T

rotor = Rotor()
rotor.fit_rotate(xy)
elbow_idx = rotor.get_elbow_index()
rotor.plot_elbow()
eps = distances[elbow_idx]/2
del x,y,xy

clustering = DBSCAN( algorithm = 'kd_tree',eps=eps, min_samples=5).fit(xyz_nn) #the number of samples is D+1=4
labels = clustering.labels_

colors = [int(i % 23) for i in labels] # 554 labels to 23 distinguished colors

v = pptk.viewer(data,colors)
v.set(point_size=0.01)

# matplotlib
core_samples_mask = np.zeros_like(clustering.labels_, dtype=bool)
core_samples_mask[clustering.core_sample_indices_] = True
labels = clustering.labels_