Python KDTree.query示例，sklearn.neighbors.kd_tree.KDTree.query Python示例

示例#1

0

显示文件

def generate_pairs(patches, constants):
    """Generate pairs for normalized patches."""
    k_nearest = constants.K_NEAREST
    num_patches = constants.NUM_QUERY_PATCHES
    scaled_imgs = len(patches)

    pairs = []
    query_database = []
    candidate_database = []
    index_database = []
    length_database = []
    for k in range(scaled_imgs):
        qp = [
            patch.norm_patch for patch in patches[k] if 7 <= patch.bucket <= 9
        ]
        qi = [
            index for index, patch in enumerate(patches[k])
            if 7 <= patch.bucket <= 9
        ]

        # Choose lesser query patches through random selection to improve speed
        if len(qi) > num_patches:
            np.random.seed(0)
            selection = np.random.choice(np.arange(len(qi)),
                                         num_patches,
                                         replace=False).tolist()
            selection.sort()
            query_patches = [qp[i] for i in selection]
            query_indices = [qi[i] for i in selection]
        else:
            query_patches = qp
            query_indices = qi

        query_database.append(np.vstack([query_patches]))
        index_database.append(query_indices)
        length_database.append(len(query_indices))
        candidate_database.append(
            np.vstack([[
                patch.norm_patch for i, patch in enumerate(patches[k])
                if 0 <= patch.bucket <= 5
            ]]))

    p1 = np.concatenate(candidate_database)
    kdt = KDTree(p1, leaf_size=30, metric='euclidean')

    # Find list of nearest neighbours for each patch
    # `total` is used to correct indices of queried patches for every iteration
    total = 0
    for k in range(scaled_imgs):
        nn = kdt.query(query_database[k],
                       k=k_nearest,
                       return_distance=False,
                       sort_results=False)
        q = [total + index_database[k][i] for i in range(length_database[k])]
        for i in range(len(nn)):
            for j in range(k_nearest):
                pairs.append([q[i], nn[i][j]])
        total += len(patches[k])

    return pairs

示例#2

0

显示文件

def plot_nb_dists(X, nearest_neighbor, metric='euclidean', ylim=None):
    """ Plots distance sorted by `neared_neighbor`th

    Args:
        X (list of lists): list with data tuples
        nearest_neighbor (int): nr of nearest neighbor to plot
        metric (string): name of scipy metric function to use
    """

    tree = KDTree(X, leaf_size=2)

    if not isinstance(nearest_neighbor, list):
        nearest_neighbor = [nearest_neighbor]

    max_nn = max(nearest_neighbor)

    dist, _ = tree.query(X, k=max_nn + 1)

    plt.figure()

    for nnb in nearest_neighbor:
        col = dist[:, nnb]
        col.sort()
        plt.plot(col, label="{}th nearest neighbor".format(nnb))

    #plt.ylim(0, min(250, max(dist[:, max_nn])))
    plt.ylabel("Distance to k nearest neighbor")
    plt.xlabel("Points sorted according to distance of k nearest neighbor")
    plt.ylim(0, ylim)
    plt.grid()
    plt.legend()
    plt.show()

示例#3

0

显示文件

文件： test_kd_tree.py 项目： kinnskogr/scikit-learn

    def check_neighbors(dualtree, breadth_first, k, metric, kwargs):
        kdt = KDTree(X, leaf_size=1, metric=metric, **kwargs)
        dist1, ind1 = kdt.query(Y, k, dualtree=dualtree, breadth_first=breadth_first)
        dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)

        # don't check indices here: if there are any duplicate distances,
        # the indices may not match.  Distances should not have this problem.
        assert_allclose(dist1, dist2)

示例#4

0

显示文件

    def check_neighbors(dualtree, breadth_first, k, metric, kwargs):
        kdt = KDTree(X, leaf_size=1, metric=metric, **kwargs)
        dist1, ind1 = kdt.query(Y, k, dualtree=dualtree,
                                breadth_first=breadth_first)
        dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)

        # don't check indices here: if there are any duplicate distances,
        # the indices may not match.  Distances should not have this problem.
        assert_allclose(dist1, dist2)

示例#5

0

显示文件

    def __call__(self, x, ma):
        h = F.tanh(self.l0(x))
        #h = F.tanh(self.l1(h))
        #h = F.tanh(self.l2(h))

        #kd_tree
        q_train = []  #for train [variable,variable]
        ind_list = []  #for train
        dist_list = []  #for train
        for j in range(len(ma.maq)):  #loop n_actions
            h_list = ma.mah[j]
            lp = len(h_list)
            leaf_size = lp + (lp / 2)

            tree = KDTree(h_list, leaf_size=leaf_size)
            h_ = h.data

            if lp < 50:
                k = lp
            else:
                k = 50
            dist, ind = tree.query(h_, k=k)

            count = 0
            for ii in ind[0]:
                mahi = np.zeros((1, 4), dtype=np.float32)
                mahi[0] = ma.mah[j][ii]
                hi = chainer.Variable(cuda.to_cpu(mahi))
                wi = F.expand_dims(
                    1 / (F.batch_l2_norm_squared((h - hi)) + 0.001), 1)

                if count == 0:
                    w = wi
                    maqi = np.zeros((1, 1), dtype=np.float32)
                    maqi[0] = ma.maq[j][ii]
                    q = chainer.Variable(cuda.to_cpu(maqi))
                    qq = wi * q
                    count += 1
                else:
                    w += wi
                    maqi = np.zeros((1, 1), dtype=np.float32)
                    maqi[0] = ma.maq[j][ii]
                    q = chainer.Variable(cuda.to_cpu(maqi))
                    qq += wi * q
            qq /= w

            q_train.append(qq)
            ind_list.append(ind)
            dist_list.append(dist)
            self.q_list[0][j] = qq.data[0][0]
        qa = chainer.Variable(cuda.to_cpu(self.q_list))
        return chainerrl.action_value.DiscreteActionValue(
            qa), q_train, ind_list, dist_list, h.data

示例#6

0

显示文件

文件： test_kd_tree.py 项目： AlexisMignon/scikit-learn

def test_kd_tree_pickle(protocol):
    import pickle
    rng = check_random_state(0)
    X = rng.random_sample((10, 3))
    kdt1 = KDTree(X, leaf_size=1)
    ind1, dist1 = kdt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(kdt1, protocol=protocol)
        kdt2 = pickle.loads(s)
        ind2, dist2 = kdt2.query(X)
        assert_array_almost_equal(ind1, ind2)
        assert_array_almost_equal(dist1, dist2)

    check_pickle_protocol(protocol)

示例#7

0

显示文件

文件： test_kd_tree.py 项目： hongsumfyp/scikit-learn-circle

def test_kd_tree_pickle(protocol):
    import pickle
    rng = check_random_state(0)
    X = rng.random_sample((10, 3))
    kdt1 = KDTree(X, leaf_size=1)
    ind1, dist1 = kdt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(kdt1, protocol=protocol)
        kdt2 = pickle.loads(s)
        ind2, dist2 = kdt2.query(X)
        assert_array_almost_equal(ind1, ind2)
        assert_array_almost_equal(dist1, dist2)

    check_pickle_protocol(protocol)

示例#8

0

显示文件

文件： test_kd_tree.py 项目： 0x0all/scikit-learn

def test_kd_tree_pickle():
    import pickle
    np.random.seed(0)
    X = np.random.random((10, 3))
    kdt1 = KDTree(X, leaf_size=1)
    ind1, dist1 = kdt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(kdt1, protocol=protocol)
        kdt2 = pickle.loads(s)
        ind2, dist2 = kdt2.query(X)
        assert_array_almost_equal(ind1, ind2)
        assert_array_almost_equal(dist1, dist2)

    for protocol in (0, 1, 2):
        yield check_pickle_protocol, protocol

示例#9

0

显示文件

def test_kd_tree_pickle():
    import pickle
    np.random.seed(0)
    X = np.random.random((10, 3))
    kdt1 = KDTree(X, leaf_size=1)
    ind1, dist1 = kdt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(kdt1, protocol=protocol)
        kdt2 = pickle.loads(s)
        ind2, dist2 = kdt2.query(X)
        assert_allclose(ind1, ind2)
        assert_allclose(dist1, dist2)

    for protocol in (0, 1, 2):
        yield check_pickle_protocol, protocol

示例#10

0

显示文件

    def __call__(self, x, ma):
        h = F.tanh(self.l0(x))
        h = F.tanh(self.l1(h))
        h = F.tanh(self.l2(h))

        # kd_tree
        q_train = []  # for train [variable,variable]
        ind_list = []  # for train
        dist_list = []  # for train
        for j in range(len(ma.maq)):  # loop n_actions
            h_list = ma.mah[j]
            lp = len(h_list)
            leaf_size = lp + (lp / 2)

            tree = KDTree(h_list, leaf_size=leaf_size)
            h_ = h.data

            if lp < 50:
                k = lp
            else:
                k = 50
            dist, ind = tree.query(h_, k=k)

            mahi = ma.mah[j][ind[0]]
            hi = chainer.Variable(cuda.to_cpu(mahi))
            tiled_h = chainer.Variable(np.tile(h.data, (len(ind[0]), 1)))
            wi = F.expand_dims(
                1 /
                (F.sqrt(F.sum((tiled_h - hi) *
                              (tiled_h - hi), axis=1) + 1e-3)), 1)
            w = F.sum(wi, axis=0)
            maqi = ma.maq[j][ind[0]]
            q = chainer.Variable(cuda.to_cpu(maqi))
            qq = F.expand_dims(F.sum(wi * q, axis=0) / w, 1)

            q_train.append(qq)
            ind_list.append(ind)
            dist_list.append(dist)

            self.q_list[0][j] = qq.data
        if self.use_gpu:
            qa = chainer.Variable(cuda.to_cpu(self.q_list))
        else:
            qa = self.q_list

        return qa, q_train, ind_list, dist_list, h.data

示例#11

0

显示文件

def generate_pairs_raw(patches, constants):
    """Generate raw pairs without patch normalization."""
    # Convert the list of patch norms into numpy arrays
    patch_database = []
    patch_database.append(
        np.vstack([np.reshape(patch.raw_patch, [-1]) for patch in patches[0]]))
    # Find list of just 2 nearest neighbours for each patch due to duplicate
    nearest = []
    p1 = np.concatenate(patch_database[0:])
    kdt = KDTree(p1, leaf_size=30, metric='euclidean')
    nn = kdt.query(patch_database[0],
                   k=2,
                   return_distance=False,
                   sort_results=False)
    nearest.append(nn)

    return np.concatenate(nearest)

示例#12

0

显示文件

 def write(self, h, v):
     keys = np.array(self.memory_keys, dtype=np.float32)
     values = np.array(self.memory_values, dtype=np.float32)
     if len(self.memory_keys) > 0:
         tree = KDTree(keys, leaf_size=50)
         distance, index = tree.query(np.array([h], dtype=np.float32))
         if distance[0][0] == 0:
             index = index[0][0]
             self.memory_values[index] += self.lr * (v - self.memory_values[index])
             return
     if len(self.memory_values) < self.capacity:
         self.ages[len(self.memory_values) - 1] = 0
         self.memory_keys.append(h)
         self.memory_values.append(v)
     else:
         index = np.argmin(self.ages)
         self.memory_keys[index] = h
         self.memory_values[index] = v
         self.ages[index] = 0

示例#13

0

显示文件

 def lookup(self, h):
     if len(self.memory_values) == 0:
         return np.zeros((len(h), 1, len(h[0])), dtype=np.float32), np.zeros((len(h), 1), dtype=np.float32)
     keys = np.array(self.memory_keys, dtype=np.float32)
     values = np.array(self.memory_values, dtype=np.float32)
     size = keys.shape[0]
     if size < self.p:
         k = size
     else:
         k = self.p
     queried_keys = np.zeros((len(h), k, len(h[0])), dtype=np.float32)
     queried_values = np.zeros((len(h), k), dtype=np.float32)
     for i, encoded_state in enumerate(h):
         tree = KDTree(keys, leaf_size=50)
         distances, indices = tree.query(np.array([encoded_state], dtype=np.float32), k=k)
         queried_keys[i] = keys[indices]
         queried_values[i] = values[indices][-1]
         self.ages += 1
         self.ages[indices] = 0
     return queried_keys, queried_values

示例#14

0

显示文件

文件： KDTree_query.py 项目： rysnee/DoAn

from VLADlib.Descriptors import *

pathVD = "visualWords/visualWords.pickle"
with open(pathVD, 'rb') as f:
    vocab = pickle.load(f)

training = np.asarray([i.toarray()[0].tolist() for i in vocab])
tree = KDTree(training, leaf_size=2)

image = 'dataset/3.jpg'
im = cv2.imread(image)

# initial BoW
pathVD = 'visualDictionary/visualDictionary2ORB.pickle'
with open(pathVD, 'rb') as g:
    visualDictionary = pickle.load(g)

bovw = BagOfVisualWords(visualDictionary.cluster_centers_)

#compute descriptors
kp, descriptor = describeORB(im)

# represent at BoW
hist = bovw.describe(descriptor)
query = np.asarray(hist.toarray()[0].tolist())

print("Query = ", query)

dist, ind = tree.query(query.reshape(1, -1), k=3)
print(ind)

示例#15

0

显示文件

文件： manipulations2.py 项目： airblair94/data_ex

#from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors.kd_tree import KDTree
#from sklearn.neighbors import DistanceMetric
import numpy as np
import get_data2 as gd

headers = gd.get_headers()
dicts = gd.get_data_list_of_dicts() 

rows_lol = []
for i in range(len(gd.get_data_slice(headers[0], dicts))):
	rows_lol.append([])

for i in range(len(headers)):
	if i ==1 or i==4:
		column = gd.get_data_slice_numbers(headers[i], dicts)
	else:
		column = gd.get_data_slice_numbers(headers[i], dicts)
	for j in range(len(gd.get_data_slice(headers[0], dicts))):
		rows_lol[j].append(column[j])

X = np.array(rows_lol)
#nbrs = NearestNeighbors(n_neighbors=5, algorithm ='kd_tree', metric ='jaccard').fit(X)
kdt = KDTree(X, leaf_size=30, metric='euclidean')
kdt.query(X, k=3, return_distance=False)