示例#1
0
def test_kd_tree_KDE(n_samples=100, n_features=3):
    np.random.seed(0)
    X = np.random.random((n_samples, n_features))
    Y = np.random.random((n_samples, n_features))
    kdt = KDTree(X, leaf_size=10)

    for kernel in [
            'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear',
            'cosine'
    ]:
        for h in [0.001, 0.01, 0.1]:
            dens_true = compute_kernel_slow(Y, X, kernel, h)

            def check_results(kernel, h, atol, rtol, dualtree, breadth_first):
                dens = kdt.kernel_density(Y,
                                          h,
                                          atol=atol,
                                          rtol=rtol,
                                          kernel=kernel,
                                          dualtree=dualtree,
                                          breadth_first=breadth_first)
                assert_allclose(dens, dens_true, atol=atol, rtol=rtol)

            for rtol in [0, 1E-5]:
                for atol in [1E-10, 1E-5, 0.1]:
                    for dualtree in (True, False):
                        if dualtree and rtol > 0:
                            continue
                        for breadth_first in (True, False):
                            yield (check_results, kernel, h, atol, rtol,
                                   dualtree, breadth_first)
示例#2
0
    def fit(self, points):
        """聚类主函数
        聚类过程主要是通过expand_cluste_order函数实现,流程如下:
        给定一个起始点pt,计算pt的core_distance和eps-邻域,并更新eps-邻域中数据点的
        reachability_distance
        然后按reachability_distance从小到大依次处理pt的eps-邻域中未处理的点(流程同上)

        遍历整个数据集,对每个未expand的数据点都执行expand,便完成了聚类,结果存储在self.results中
        数据点遍历顺序存储在self.results_order中,二者结合便可以导出具体的聚类信息

        :param points: [list] 输入数据列表,list中的每个元素都是长度固定的1维np数组
        :returns:
        :rtype:

        """
        """
        results[遍历标记,核心距离,可达距离]
        results_order 存放数据遍历顺序
        """

        self.point_num = len(points)
        self.point_size = points[0].size
        self.results = [[None, np.inf, np.inf] for x in range(self.point_num)]
        self.results_order = []
        ## 数据存储在kd树中以便检索【好像并没有用到检索...】
        self.kd_tree = KDTree(self.point_size)
        self.kd_tree.create(points)

        for point_id in range(self.point_num):
            ## 如果当前节点没有处理过,执行expand
            if not self.results[point_id][0]:
                self._expand_cluste_order(point_id)
        return self
示例#3
0
def test_kd_buildup(points: List[Point]) -> float:
    tracemalloc.start()
    starting_mem, _ = tracemalloc.get_traced_memory()
    tree = KDTree(points)
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    return peak - starting_mem
示例#4
0
 def fit(self, X, y):
     self.label_set = set(y)
     data = list()
     for con, lab in it.izip(X, y):
         if not self.dense:
             con = con.toarray()
             con = con[0]
         data.append((con, lab))
     # Create a KDTree using the data given and store it
     self.data = KDTree(data, self.k_neighbours, balanced=self.balanced)
示例#5
0
    def check_neighbors(dualtree, breadth_first, k, metric, kwargs):
        kdt = KDTree(X, leaf_size=1, metric=metric, **kwargs)
        dist1, ind1 = kdt.query(Y,
                                k,
                                dualtree=dualtree,
                                breadth_first=breadth_first)
        dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)

        # don't check indices here: if there are any duplicate distances,
        # the indices may not match.  Distances should not have this problem.
        assert_allclose(dist1, dist2)
示例#6
0
def petrol_bunk():

    points = []
    c = 0
    infile = open('petrol_bunk.txt', 'r')
    for i in infile:
        points.append([])
        points[c].append((list(map(float, i.rstrip().split(",")))))
        points[c].append(c)
        c += 1

    return KDTree(points), points
def test_kd_search(points: List[Point],
                   rectangles: List[Rectangle]) -> List[float]:
    tree = KDTree(points)

    def time_individual(rectangle: Rectangle) -> float:
        min_x, max_x, min_y, max_y = rectangle.to_tuple()
        start_time = default_timer()
        tree.search(min_x, max_x, min_y, max_y)
        end_time = default_timer()
        return end_time - start_time

    return list(map(time_individual, rectangles))
示例#8
0
def hospitals():

    points = []
    c = 0
    infile = open('hospitals.txt', 'r')
    for i in infile:
        points.append([])
        points[c].append((list(map(float, i.rstrip().split(",")))))
        points[c].append(c)
        c += 1

    return KDTree(points), points
示例#9
0
def test_kd_tree_pickle():
    import pickle
    np.random.seed(0)
    X = np.random.random((10, 3))
    kdt1 = KDTree(X, leaf_size=1)
    ind1, dist1 = kdt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(kdt1, protocol=protocol)
        kdt2 = pickle.loads(s)
        ind2, dist2 = kdt2.query(X)
        assert_allclose(ind1, ind2)
        assert_allclose(dist1, dist2)

    for protocol in (0, 1, 2):
        yield check_pickle_protocol, protocol
示例#10
0
def test_kd_tree_two_point(n_samples=100, n_features=3):
    np.random.seed(0)
    X = np.random.random((n_samples, n_features))
    Y = np.random.random((n_samples, n_features))
    r = np.linspace(0, 1, 10)
    kdt = KDTree(X, leaf_size=10)

    D = DistanceMetric.get_metric("euclidean").pairwise(Y, X)
    counts_true = [(D <= ri).sum() for ri in r]

    def check_two_point(r, dualtree):
        counts = kdt.two_point_correlation(Y, r=r, dualtree=dualtree)
        assert_allclose(counts, counts_true)

    for dualtree in (True, False):
        yield check_two_point, r, dualtree
示例#11
0
    def test_random(self):
        count, sigma1, sigma2 = 10000, 0.6, 0.5

        np.random.seed(0)
        x = np.random.normal(3, sigma1, count)
        y = np.random.normal(3, sigma2, count)

        point = [3.01, 3.01]
        for i in range(count):
            if 2.98 < x[i] < 3.03 and 2.98 < y[i] < 3.03:
                ax.scatter(x[i], y[i], c='b', marker='s', s=10, alpha=0.7)
        # ax.scatter(x, y, c='b', marker='s', s=10, alpha=0.7)
        points = np.c_[x, y]

        tree = KDTree(points)
        show_closest(tree, point, 50, 'm')
        plt.show()
示例#12
0
def test_kd_tree_query_radius(n_samples=100, n_features=10):
    np.random.seed(0)
    X = 2 * np.random.random(size=(n_samples, n_features)) - 1
    query_pt = np.zeros(n_features, dtype=float)

    eps = 1E-15  # roundoff error can cause test to fail
    kdt = KDTree(X, leaf_size=5)
    rad = np.sqrt(((X - query_pt)**2).sum(1))

    for r in np.linspace(rad[0], rad[-1], 100):
        ind = kdt.query_radius(query_pt, r + eps)[0]
        i = np.where(rad <= r + eps)[0]

        ind.sort()
        i.sort()

        assert_allclose(i, ind)
示例#13
0
def test_kd_tree_query_radius_distance(n_samples=100, n_features=10):
    np.random.seed(0)
    X = 2 * np.random.random(size=(n_samples, n_features)) - 1
    query_pt = np.zeros(n_features, dtype=float)

    eps = 1E-15  # roundoff error can cause test to fail
    kdt = KDTree(X, leaf_size=5)
    rad = np.sqrt(((X - query_pt)**2).sum(1))

    for r in np.linspace(rad[0], rad[-1], 100):
        ind, dist = kdt.query_radius(query_pt, r + eps, return_distance=True)

        ind = ind[0]
        dist = dist[0]

        d = np.sqrt(((query_pt - X[ind])**2).sum(1))

        assert_allclose(d, dist)
示例#14
0
def main():
    print("Testing KD Tree...")
    test_times = 100
    run_time_1 = run_time_2 = 0

    for _ in range(test_times):
        # 随机生成数据
        low = 0
        high = 100
        n_rows = 1000
        n_cols = 2
        X = gen_data(low, high, n_rows, n_cols)
        y = gen_data(low, high, n_rows)
        Xi = gen_data(low, high, n_cols)

        # 创建Kd树
        tree = KDTree()
        tree.build_tree(X, y)

        # Kd树查找
        start = time()
        nd = tree.nearest_neighbour_search(Xi)
        run_time_1 += time() - start
        ret1 = get_eu_dist(Xi, nd.split[0])

        # 普通线性查找
        start = time()
        row = exhausted_search(X, Xi)
        run_time_2 += time() - start
        ret2 = get_eu_dist(Xi, row)

        # 比较结果
        assert ret1 == ret2, "target:%s\nrestult1:%s\nrestult2:%s\ntree:\n%s" % (
            Xi, nd, row, tree)

    print("%d tests passed!" % test_times)
    print("KD Tree Search %.2f s" % run_time_1)
    print("Exhausted search %.2f s" % run_time_2)
    def test_randoms(self):
        count, sigma1, sigma2 = 500, 0.6, 0.5

        np.random.seed(0)
        x1 = np.random.normal(5, sigma1, count)
        y1 = np.random.normal(5, sigma2, count)

        x2 = np.random.normal(3, sigma1, count)
        y2 = np.random.normal(4, sigma2, count)

        x3 = np.random.normal(4.5, sigma1, count)
        y3 = np.random.normal(2.5, sigma2, count)

        point = [np.random.normal(5, 0.6), np.random.normal(5, 0.5)]

        ax.scatter(x1, y1, c='b', marker='s', s=10, alpha=0.7)
        ax.scatter(x2, y2, c='r', marker='^', s=10, alpha=0.7)
        ax.scatter(x3, y3, c='g', s=10, alpha=0.7)

        points = np.c_[np.r_[x1, x2, x3], np.r_[y1, y2, y3]]

        tree = KDTree(points)
        show_closest(tree, point, 'm')
        plt.show()
def test_should_find_n_closest_7():
    kdtree = KDTree(dataset_2d)
    dist, ind = kdtree.query(np.array([7, 1]), n=3)
    assert ind == [5, 4, 1]
def test_should_find_closest_6():
    kdtree = KDTree(dataset_3d)
    dist, ind = kdtree.query(np.array([8, 8, 8]), n=1)
    assert ind == [4]
def test_should_find_closest_5():
    kdtree = KDTree(dataset_3d)
    dist, ind = kdtree.query(np.array([4, 5, 6]), n=1)
    assert ind == [2]
def test_should_find_closest_4():
    kdtree = KDTree(dataset_3d)
    dist, ind = kdtree.query(np.array([1, 2, 3]), n=1)
    assert ind == [3]
示例#20
0
def build_tree(pixels):
    kdt = KDTree(3)
    for pix in pixels:
        kdt.insert(pix)
    return kdt
示例#21
0
 def fit(self, training_set, labels):
     if self.algorithm == 'kd_tree':
         self.handler = KDTree(training_set, labels)
     else:
         self.handler = BruteSearch(training_set, labels)
def test_should_find_closest_1():
    kdtree = KDTree(dataset_2d)
    dist, ind = kdtree.query(np.array([9, 2]), n=1)
    assert ind == [4]
def test_should_find_n_closest_8():
    kdtree = KDTree(dataset_2d)
    dist, ind = kdtree.query(np.array([3, 2]), n=3)
    assert ind == [0, 1, 5]
示例#24
0
 def setUp(self):
     points = [(2, 3), (5, 4), (9, 6), (4, 7), (8, 1), (7, 2)]
     self.tree = KDTree(points)
示例#25
0
    #      [3,4,6,1],
    #      [3,6,6,1],
    #      [3,5,9,2],
    #      [3,5,12,2],
    #      [3,5,13,2],]

    X = [[2, 3, 1], [5, 4, 1], [9, 6, 1], [8.5, 6, 1], [4, 7, 1], [8, 1, 1],
         [7, 2, 1]]

    X = np.array(X)

    # data_train = pd.read_csv('./data_set/iris_1.csv', header=0)
    # train_data = np.array(data_train)

    # X = train_data[:, :-1]
    # y = train_data[:, -1]

    # X_train, X_test, y_train, y_true = train_test_split(X, y,test_size=1 / 3., random_state=6)
    #
    # train_set = np.column_stack((X_train, y_train))

    kd = KDTree()
    kd.build_tree(X)

    x = [[7, 6, 1], [3, 4.5, 1]]
    test_x = np.array(x)
    # print(test_x[:,:-1])
    nearest = kd.search_neighbour(test_x)
    for i in range(len(test_x)):
        print(test_x[i], '--->', nearest[i])
示例#26
0
def test_kd_buildup(points: List[Point]) -> float:
    start_time = default_timer()
    _ = KDTree(points)
    end_time = default_timer()
    return end_time - start_time
def test_should_find_n_closest_9():
    kdtree = KDTree(dataset_3d)
    dist, ind = kdtree.query(np.array([4, 5, 6]), n=3)
    assert ind == [2, 3, 1]  # ????? падает!
def main():
    dataset_2d = np.array([[2, 3], [5, 4], [9, 6], [4, 7], [8, 1], [7, 2]])
    kdtree = KDTree(dataset_2d)
    dist, ind = kdtree.query(np.array([9, 2]), n=1)
示例#29
0
 def setUp(self):
     np.random.seed(0)
     points = np.random.randint(0, 50, 10)
     self.tree = KDTree(np.transpose([points]))
示例#30
0
 def __create_kdtree(self):
     self.__KDTree_T = KDTree()
     for curr_cluster in self.__heap_q:
         for rep_point in curr_cluster.rep:
             self.__KDTree_T.insert(rep_point, curr_cluster)