Пример #1
0
    def cluster_clique(self, intervals, threshold):

        self.clustering_name = "clique clustering"

        X = self.data_frame.get_point_only_df().values

        clique_instance = clique(X, intervals, threshold)

        clique_instance.process()
        clusters = clique_instance.get_clusters()

        self.data_frame.add_result_name(self.clustering_name, -2,
                                        ColType.CLUSTER_LABEL)

        i = 1
        for cluster in clusters:
            for index in cluster:
                self.data_frame.add_result(self.clustering_name, index, i)
            i += 1

        self.cluster_count = len(
            set(self.data_frame.df[self.clustering_name].tolist()))
        self.clustering_result = self.data_frame.df[
            self.clustering_name].tolist()

        return self.clustering_name
    def visualize(path, levels, threshold, ccore_enabled, **kwargs):
        sample = read_sample(path)

        clique_instance = clique(sample, levels, threshold, ccore=ccore_enabled)
        clique_instance.process()

        cells = clique_instance.get_cells()

        clique_visualizer.show_grid(cells, sample)
Пример #3
0
    def visualize(path, levels, threshold, ccore_enabled, **kwargs):
        sample = read_sample(path)

        clique_instance = clique(sample, levels, threshold, ccore=ccore_enabled)
        clique_instance.process()

        cells = clique_instance.get_cells()

        clique_visualizer.show_grid(cells, sample)
Пример #4
0
 def fit(self,data):
     data = data.values
     self.CLIQUE = clique(data,self.intervals,self.threshold)
     self.CLIQUE.process()
     preds = self.CLIQUE.get_clusters()
     
     self.labels_ = np.empty(data.shape[0],dtype=int)
     for id_,pred in enumerate(preds):
         for i in pred:
             self.labels_[i] = id_
    def exception(type, sample_storage, levels, threshold, ccore_enabled):
        try:
            sample = sample_storage
            if isinstance(sample_storage, str):
                sample = read_sample(sample_storage)

            bang_instance = clique(sample, levels, threshold, ccore=ccore_enabled)
            bang_instance.process()

        except type:
            return

        except Exception as ex:
            raise AssertionError("Expected: '%s', Actual: '%s'" % (type, type(ex).__name__))

        raise AssertionError("Expected: '%s', Actual: 'None'" % type)
Пример #6
0
    def exception(type, sample_storage, levels, threshold, ccore_enabled):
        try:
            sample = sample_storage
            if isinstance(sample_storage, str):
                sample = read_sample(sample_storage)

            bang_instance = clique(sample, levels, threshold, ccore=ccore_enabled)
            bang_instance.process()

        except type:
            return

        except Exception as ex:
            raise AssertionError("Expected: '%s', Actual: '%s'" % (type, type(ex).__name__))

        raise AssertionError("Expected: '%s', Actual: 'None'" % type)
Пример #7
0
def analyze_manifold_old(model: VaeWrapper, sess, xs, ys, stage=1):
    # TODO (3/17): deprecate

    inds = list(range(len(xs)))
    cnt = 1000
    np.random.shuffle(inds)
    xs, ys = xs[inds][:cnt], ys[inds][:cnt]
    zs = model.encode(xs, stage=stage)
    zst = zs.T
    corr = np.corrcoef(zst)
    print(corr)

    # create CLIQUE algorithm for processing
    intervals = 20  # defines amount of cells in grid in each dimension
    threshold = 0
    clique_instance = clique(zs, intervals, threshold)
    # start clustering process and obtain results
    clique_instance.process()
    clusters = clique_instance.get_clusters()  # allocated clusters
    # points that are considered as outliers
    noise = clique_instance.get_noise()
    cells = clique_instance.get_cells()  # CLIQUE blocks that forms grid
    print("Amount of clusters:", len(clusters))
    encodings = clique_instance.get_cluster_encoding()
    print(encodings)

    if model.latent_dim == 2:
        # visualize clustering results
        # clique_visualizer.show_grid(cells, zs)
        # clique_visualizer.show_clusters(zs, clusters, noise)
        import hdbscan
        clusterer = hdbscan.HDBSCAN(min_cluster_size=4)
        cluster_labels = clusterer.fit_predict(zs)
        print("# clusters by HDBSCAN", clusterer.labels_.max())
        print("probabilities", clusterer.probabilities_)
        print("labels", clusterer.labels_)
        clusterer.condensed_tree_.plot()
        plt.show()

    elif model.latent_dim == 3:
        fig = go.Figure(data=[go.Scatter3d(
            x=zst[0], y=zst[1], z=zst[2], mode='markers',
            marker=dict(size=2, color=ys, colorscale='Viridis', opacity=0.8),
        )])
        # tight layout
        fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
        fig.show()
Пример #8
0
    def setup(self, keywords={}):
        """
        Setup the algorithms
        """
        for p in keywords.keys():
            setattr(self, p, keywords[p])

        if self.method == "bang":
            self.obj = bang(self.data_list,
                            self.levels,
                            ccore=self.ccore,
                            density_threshold=self.density_threshold,
                            amount_threshold=self.amount_threshold)
        if self.method == "clique":
            self.obj = clique(self.data_list,
                              self.amount_threshold,
                              self.density_threshold,
                              ccore=self.ccore)
        return
def template_clustering(data_path, intervals, density_threshold, **kwargs):
    print("Sample: '%s'." % os.path.basename(data_path))

    data = read_sample(data_path)

    clique_instance = clique(data, intervals, density_threshold)
    clique_instance.process()

    clusters = clique_instance.get_clusters()
    noise = clique_instance.get_noise()
    cells = clique_instance.get_cells()

    print([len(cluster) for cluster in clusters])

    clique_visualizer.show_grid(cells, data)

    visualizer = cluster_visualizer()
    visualizer.append_clusters(clusters, data)
    visualizer.append_cluster(noise, data, marker='x')
    visualizer.show()
Пример #10
0
    def clustering(path, intervals, density_threshold, expected_clusters, expected_noise, ccore_enabled, **kwargs):
        sample = read_sample(path)
        dimension = len(sample[0])

        clique_instance = clique(sample, intervals, density_threshold, ccore=ccore_enabled)
        clique_instance.process()

        clusters = clique_instance.get_clusters()
        noise = clique_instance.get_noise()
        cells = clique_instance.get_cells()

        assertion.eq(len(cells), pow(intervals, dimension))

        obtained_length = len(noise)
        obtained_cluster_length = []
        for cluster in clusters:
            obtained_length += len(cluster)
            obtained_cluster_length.append(len(cluster))

        obtained_cluster_length.sort()

        assertion.eq(len(sample), obtained_length)
        assertion.eq(expected_noise, len(noise))

        if expected_clusters is not None:
            assertion.eq(len(expected_clusters), len(clusters))
            assertion.eq(expected_clusters, obtained_cluster_length)

        covered_points = set()
        for cell in cells:
            points = cell.points
            for index_point in points:
                covered_points.add(index_point)

        assertion.eq(len(sample), len(covered_points))
        return clique_instance
Пример #11
0
    def clustering(path, intervals, density_threshold, expected_clusters, expected_noise, ccore_enabled, **kwargs):
        sample = read_sample(path)
        dimension = len(sample[0])

        clique_instance = clique(sample, intervals, density_threshold, ccore=ccore_enabled)
        clique_instance.process()

        clusters = clique_instance.get_clusters()
        noise = clique_instance.get_noise()
        cells = clique_instance.get_cells()

        assertion.eq(len(cells), pow(intervals, dimension))

        obtained_length = len(noise)
        obtained_cluster_length = []
        for cluster in clusters:
            obtained_length += len(cluster)
            obtained_cluster_length.append(len(cluster))

        obtained_cluster_length.sort()

        assertion.eq(len(sample), obtained_length)
        assertion.eq(expected_noise, len(noise))

        if expected_clusters is not None:
            assertion.eq(len(expected_clusters), len(clusters))
            assertion.eq(expected_clusters, obtained_cluster_length)

        covered_points = set()
        for cell in cells:
            points = cell.points
            for index_point in points:
                covered_points.add(index_point)

        assertion.eq(len(sample), len(covered_points))
        return clique_instance
Пример #12
0
data = pd.read_csv("Mall_Customers.csv")
data.rename(columns={
    'Annual Income (k$)': 'Annual_Income',
    'Spending Score (1-100)': 'Spending_Score'
},
            inplace=True)
data['Gender'] = data['Gender'].replace(['Male', 'Female'], [0, 1])
data.drop(["CustomerID"], axis=1, inplace=True)
data_values = data.values

# Define the number of grid cells in each dimension
intervals = 5
# Density threshold
threshold = 0
clique_instance = clique(data_values, intervals, threshold)

clique_instance.process()
clique_cluster = clique_instance.get_clusters()

noise = clique_instance.get_noise()
cells = clique_instance.get_cells()

print("Amount of clusters:", len(clique_cluster))
for cluster in clique_cluster:
    print(cluster)

labelList = [0] * 200
j = 1
for cluster in clique_cluster:
    for x in cluster:
sales.date = sales.date.apply(
    lambda x: datetime.datetime.strptime(x, '%d.%m.%Y'))

monthly_sales = sales.groupby(["date_block_num", "shop_id",
                               "item_id"])["date_block_num", "item_price",
                                           "item_cnt_day", "shop_id"].agg({
                                               "date_block_num":
                                               "mean",
                                               "item_price":
                                               "mean",
                                               "item_cnt_day":
                                               "sum",
                                               "shop_id":
                                               "mean"
                                           })
df = pd.DataFrame(monthly_sales)
df = np.array(df)

from pyclustering.cluster.clique import clique, clique_visualizer
from pyclustering.utils import read_sample
from pyclustering.samples.definitions import FCPS_SAMPLES
intervals = 10
threshold = 0
clique_instance = clique(df, intervals, threshold)
clique_instance.process()
clusters = clique_instance.get_clusters()
noise = clique_instance.get_noise()
cells = clique_instance.get_cells()
print("Amount of clusters:", len(clusters))
Пример #14
0
    data_M = np.array(data)
    return data_M

    # file_list = ["five_cluster.txt", "spiral.txt",
    #              "ThreeCircles.txt", "Twomoons.txt"]


data_M = loadDataSet('Twomoons.txt')
TestData = data_M[:, [1, 2]]
g_truth = data_M[:, 0]
# 创建 CLIQUE 算法进行处理
# 定义每个维度中网格单元的数量
intervals = 15
# 密度阈值
threshold = 10
clique_instance = clique(TestData, intervals, threshold)
'''
five_cluster.txt  intervals = 15,threshold = 10  0.812    t=0.039999961853027344
spiral.txt no
ThreeCircles.txt  intervals = 20,threshold = 1  0.990563419372745   t=0.0800008773803711
Twomoons.txt  intervals = 15,threshold = 1    0.9880159786950732   t=0.04000043869018555
'''

# 开始聚类过程并获得结果
t1 = time.time_ns()
clique_instance.process()
t2 = time.time_ns()
clique_cluster = clique_instance.get_clusters()  # allocated clusters
# 被认为是异常值的点(噪点)
noise = clique_instance.get_noise()
# CLIQUE形成的网格单元
Пример #15
0
 def test_high_dimension_data_failure(self):
     data = [[0, 1, 2, 1, 3, 4, 5, 1, 2, 3, 3, 1, 3], [0, 1, 0, 1, 3, 8, 5, 5, 3, 3, 3, 0, 0]]
     clique_instance = clique(data, 15, 0)
     assertion.exception(RuntimeError, clique_instance.process)
Пример #16
0
del X1, X2, X3, X4, X5

# correct cluster labels in an array
col = []
for i in range(n_cluster):
    for j in range(n):
        col.append(i)
for i in range(n_noise):
    col.append(-1)
col = (col)
del i, j

# apply DBSCAN multiple times
X_cliq = X.values.tolist()
t0 = time.time()
clique_instance = clique(X_cliq, intervall, threshold)
clique_instance.process()
clusters = clique_instance.get_clusters()
noise_c = clique_instance.get_noise()
t1 = time.time()
clusters.append(noise_c)

## plot the result
# array with all the color per cluster
color = []
help_color = np.array([np.zeros(len(X))] * 2).T
counter = 0
for j in range(len(clusters)):
    for index, i in enumerate(clusters[j]):
        help_color[counter, 0] = i
        help_color[counter, 1] = j