Exemplo n.º 1
0
def kdtree_contribute_Matrix(S, K):
    N = len(S)
    A = np.zeros((N, N))
    leaf_size = 4
    root = kdtree.kdtree_construction(S, leaf_size=leaf_size)
    for i in range(N):
        query = S[i]
        result_set = KNNResultSet(capacity=K)
        kdtree.kdtree_knn_search(root, S, result_set, query)
        index = result_set.knn_output_index()
        for j in index:
            A[i][j] = 1  #
            A[j][i] = A[i][j]
            if i == j:
                A[i][j] = 0
    return A
Exemplo n.º 2
0
    def fit(self, data):
        #TODO
        #step1 随机选取 K个数据点 作为聚类的中心
        self.centers_ = data[random.sample(range(data.shape[0]),
                                           self.k_)]  #random.sample(list,num)
        old_centers = np.copy(self.centers_)  #存储old_centers

        #step2 E-Step(expectation):N个点、K个中心,求N个点到K个中心的nearest-neighbor
        #kd-tree config
        leaf_size = 1
        k = 1  # 结果每个点选取属于自己的类中心
        for _ in range(self.max_iter_):
            labels = [[] for i in range(self.k_)]  #用于分类所有数据点
            root = kdtree.kdtree_construction(
                self.centers_, leaf_size=leaf_size)  #对中心点进行构建kd-tree
            for i in range(data.shape[0]):  #对每一个点在4个中心点中进行 1-NN的搜索
                result_set = KNNResultSet(capacity=k)
                query = data[i]
                kdtree.kdtree_knn_search(root, self.centers_, result_set,
                                         query)  #返回对应中心点的索引
                # labels[result_set.output_index].append(data[i])
                #print(result_set)
                output_index = result_set.knn_output_index()[0]  #获取最邻近点的索引
                labels[output_index].append(data[i])  #将点放入类中

            #step3 M-Step(maximization):更新中心点的位置,把属于同一个类的数据点求一个均值,作为这个类的中心值
            for i in range(self.k_):  #求K类里,每个类的的中心点
                points = np.array(labels[i])
                self.centers_[i] = points.mean(axis=0)  #取点的均值,作为新的聚类中心
                # print(points)
                # print(self.centers_[i])
            if np.sum(
                    np.abs(self.centers_ - old_centers)
            ) < self.tolerance_ * self.k_:  # 如果前后聚类中心的距离相差小于self.tolerance_ * self.k_ 输出
                break
            old_centers = np.copy(self.centers_)  #保存旧中心点
        self.fitted = True
        Point_Show(self.centers_)