示例#1
0
def plot_nb_dists(X, nearest_neighbor, metric='euclidean', ylim=None):
    """ Plots distance sorted by `neared_neighbor`th

    Args:
        X (list of lists): list with data tuples
        nearest_neighbor (int): nr of nearest neighbor to plot
        metric (string): name of scipy metric function to use
    """

    tree = KDTree(X, leaf_size=2)

    if not isinstance(nearest_neighbor, list):
        nearest_neighbor = [nearest_neighbor]

    max_nn = max(nearest_neighbor)

    dist, _ = tree.query(X, k=max_nn + 1)

    plt.figure()

    for nnb in nearest_neighbor:
        col = dist[:, nnb]
        col.sort()
        plt.plot(col, label="{}th nearest neighbor".format(nnb))

    #plt.ylim(0, min(250, max(dist[:, max_nn])))
    plt.ylabel("Distance to k nearest neighbor")
    plt.xlabel("Points sorted according to distance of k nearest neighbor")
    plt.ylim(0, ylim)
    plt.grid()
    plt.legend()
    plt.show()
示例#2
0
def generate_pairs(patches, constants):
    """Generate pairs for normalized patches."""
    k_nearest = constants.K_NEAREST
    num_patches = constants.NUM_QUERY_PATCHES
    scaled_imgs = len(patches)

    pairs = []
    query_database = []
    candidate_database = []
    index_database = []
    length_database = []
    for k in range(scaled_imgs):
        qp = [
            patch.norm_patch for patch in patches[k] if 7 <= patch.bucket <= 9
        ]
        qi = [
            index for index, patch in enumerate(patches[k])
            if 7 <= patch.bucket <= 9
        ]

        # Choose lesser query patches through random selection to improve speed
        if len(qi) > num_patches:
            np.random.seed(0)
            selection = np.random.choice(np.arange(len(qi)),
                                         num_patches,
                                         replace=False).tolist()
            selection.sort()
            query_patches = [qp[i] for i in selection]
            query_indices = [qi[i] for i in selection]
        else:
            query_patches = qp
            query_indices = qi

        query_database.append(np.vstack([query_patches]))
        index_database.append(query_indices)
        length_database.append(len(query_indices))
        candidate_database.append(
            np.vstack([[
                patch.norm_patch for i, patch in enumerate(patches[k])
                if 0 <= patch.bucket <= 5
            ]]))

    p1 = np.concatenate(candidate_database)
    kdt = KDTree(p1, leaf_size=30, metric='euclidean')

    # Find list of nearest neighbours for each patch
    # `total` is used to correct indices of queried patches for every iteration
    total = 0
    for k in range(scaled_imgs):
        nn = kdt.query(query_database[k],
                       k=k_nearest,
                       return_distance=False,
                       sort_results=False)
        q = [total + index_database[k][i] for i in range(length_database[k])]
        for i in range(len(nn)):
            for j in range(k_nearest):
                pairs.append([q[i], nn[i][j]])
        total += len(patches[k])

    return pairs
示例#3
0
    def check_neighbors(dualtree, breadth_first, k, metric, kwargs):
        kdt = KDTree(X, leaf_size=1, metric=metric, **kwargs)
        dist1, ind1 = kdt.query(Y, k, dualtree=dualtree, breadth_first=breadth_first)
        dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)

        # don't check indices here: if there are any duplicate distances,
        # the indices may not match.  Distances should not have this problem.
        assert_allclose(dist1, dist2)
示例#4
0
    def check_neighbors(dualtree, breadth_first, k, metric, kwargs):
        kdt = KDTree(X, leaf_size=1, metric=metric, **kwargs)
        dist1, ind1 = kdt.query(Y, k, dualtree=dualtree,
                                breadth_first=breadth_first)
        dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)

        # don't check indices here: if there are any duplicate distances,
        # the indices may not match.  Distances should not have this problem.
        assert_allclose(dist1, dist2)
示例#5
0
    def __call__(self, x, ma):
        h = F.tanh(self.l0(x))
        #h = F.tanh(self.l1(h))
        #h = F.tanh(self.l2(h))

        #kd_tree
        q_train = []  #for train [variable,variable]
        ind_list = []  #for train
        dist_list = []  #for train
        for j in range(len(ma.maq)):  #loop n_actions
            h_list = ma.mah[j]
            lp = len(h_list)
            leaf_size = lp + (lp / 2)

            tree = KDTree(h_list, leaf_size=leaf_size)
            h_ = h.data

            if lp < 50:
                k = lp
            else:
                k = 50
            dist, ind = tree.query(h_, k=k)

            count = 0
            for ii in ind[0]:
                mahi = np.zeros((1, 4), dtype=np.float32)
                mahi[0] = ma.mah[j][ii]
                hi = chainer.Variable(cuda.to_cpu(mahi))
                wi = F.expand_dims(
                    1 / (F.batch_l2_norm_squared((h - hi)) + 0.001), 1)

                if count == 0:
                    w = wi
                    maqi = np.zeros((1, 1), dtype=np.float32)
                    maqi[0] = ma.maq[j][ii]
                    q = chainer.Variable(cuda.to_cpu(maqi))
                    qq = wi * q
                    count += 1
                else:
                    w += wi
                    maqi = np.zeros((1, 1), dtype=np.float32)
                    maqi[0] = ma.maq[j][ii]
                    q = chainer.Variable(cuda.to_cpu(maqi))
                    qq += wi * q
            qq /= w

            q_train.append(qq)
            ind_list.append(ind)
            dist_list.append(dist)
            self.q_list[0][j] = qq.data[0][0]
        qa = chainer.Variable(cuda.to_cpu(self.q_list))
        return chainerrl.action_value.DiscreteActionValue(
            qa), q_train, ind_list, dist_list, h.data
示例#6
0
def test_kd_tree_two_point(dualtree):
    n_samples, n_features = (100, 3)
    rng = check_random_state(0)
    X = rng.random_sample((n_samples, n_features))
    Y = rng.random_sample((n_samples, n_features))
    r = np.linspace(0, 1, 10)
    kdt = KDTree(X, leaf_size=10)

    D = DistanceMetric.get_metric("euclidean").pairwise(Y, X)
    counts_true = [(D <= ri).sum() for ri in r]

    counts = kdt.two_point_correlation(Y, r=r, dualtree=dualtree)
    assert_array_almost_equal(counts, counts_true)
示例#7
0
def test_kd_tree_two_point(dualtree):
    n_samples, n_features = (100, 3)
    rng = check_random_state(0)
    X = rng.random_sample((n_samples, n_features))
    Y = rng.random_sample((n_samples, n_features))
    r = np.linspace(0, 1, 10)
    kdt = KDTree(X, leaf_size=10)

    D = DistanceMetric.get_metric("euclidean").pairwise(Y, X)
    counts_true = [(D <= ri).sum() for ri in r]

    counts = kdt.two_point_correlation(Y, r=r, dualtree=dualtree)
    assert_array_almost_equal(counts, counts_true)
示例#8
0
    def eps_neighbourhood(X, index, eps, metric):
        """
        Query for neighbors within a given radius.

        :param X: data
        :param index: index position of point in data
        :param eps: looking for points inside radius eps
        :param metric: distance metric
        :return: vector of indices
        """
        tree = KDTree(X, leaf_size=2, metric=metric)
        indices = tree.query_radius([X[index]], r=eps)
        return indices[0]
示例#9
0
def test_kd_tree_pickle(protocol):
    import pickle
    rng = check_random_state(0)
    X = rng.random_sample((10, 3))
    kdt1 = KDTree(X, leaf_size=1)
    ind1, dist1 = kdt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(kdt1, protocol=protocol)
        kdt2 = pickle.loads(s)
        ind2, dist2 = kdt2.query(X)
        assert_array_almost_equal(ind1, ind2)
        assert_array_almost_equal(dist1, dist2)

    check_pickle_protocol(protocol)
示例#10
0
def test_gaussian_kde(n_samples=1000):
    # Compare gaussian KDE results to scipy.stats.gaussian_kde
    from scipy.stats import gaussian_kde
    rng = check_random_state(0)
    x_in = rng.normal(0, 1, n_samples)
    x_out = np.linspace(-5, 5, 30)

    for h in [0.01, 0.1, 1]:
        kdt = KDTree(x_in[:, None])
        gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))

        dens_kdt = kdt.kernel_density(x_out[:, None], h) / n_samples
        dens_gkde = gkde.evaluate(x_out)

        assert_array_almost_equal(dens_kdt, dens_gkde, decimal=3)
示例#11
0
def test_gaussian_kde(n_samples=1000):
    # Compare gaussian KDE results to scipy.stats.gaussian_kde
    from scipy.stats import gaussian_kde
    rng = check_random_state(0)
    x_in = rng.normal(0, 1, n_samples)
    x_out = np.linspace(-5, 5, 30)

    for h in [0.01, 0.1, 1]:
        kdt = KDTree(x_in[:, None])
        gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))

        dens_kdt = kdt.kernel_density(x_out[:, None], h) / n_samples
        dens_gkde = gkde.evaluate(x_out)

        assert_array_almost_equal(dens_kdt, dens_gkde, decimal=3)
def test_kd_tree_pickle(protocol):
    import pickle
    rng = check_random_state(0)
    X = rng.random_sample((10, 3))
    kdt1 = KDTree(X, leaf_size=1)
    ind1, dist1 = kdt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(kdt1, protocol=protocol)
        kdt2 = pickle.loads(s)
        ind2, dist2 = kdt2.query(X)
        assert_array_almost_equal(ind1, ind2)
        assert_array_almost_equal(dist1, dist2)

    check_pickle_protocol(protocol)
示例#13
0
def test_kd_tree_pickle():
    import pickle
    np.random.seed(0)
    X = np.random.random((10, 3))
    kdt1 = KDTree(X, leaf_size=1)
    ind1, dist1 = kdt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(kdt1, protocol=protocol)
        kdt2 = pickle.loads(s)
        ind2, dist2 = kdt2.query(X)
        assert_array_almost_equal(ind1, ind2)
        assert_array_almost_equal(dist1, dist2)

    for protocol in (0, 1, 2):
        yield check_pickle_protocol, protocol
示例#14
0
def test_kd_tree_pickle():
    import pickle
    np.random.seed(0)
    X = np.random.random((10, 3))
    kdt1 = KDTree(X, leaf_size=1)
    ind1, dist1 = kdt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(kdt1, protocol=protocol)
        kdt2 = pickle.loads(s)
        ind2, dist2 = kdt2.query(X)
        assert_allclose(ind1, ind2)
        assert_allclose(dist1, dist2)

    for protocol in (0, 1, 2):
        yield check_pickle_protocol, protocol
示例#15
0
def test_kd_tree_kde(n_samples=100, n_features=3):
    np.random.seed(0)
    X = np.random.random((n_samples, n_features))
    Y = np.random.random((n_samples, n_features))
    kdt = KDTree(X, leaf_size=10)

    for kernel in [
            'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear',
            'cosine'
    ]:
        for h in [0.01, 0.1, 1]:
            dens_true = compute_kernel_slow(Y, X, kernel, h)

            def check_results(kernel, h, atol, rtol, breadth_first):
                dens = kdt.kernel_density(Y,
                                          h,
                                          atol=atol,
                                          rtol=rtol,
                                          kernel=kernel,
                                          breadth_first=breadth_first)
                assert_allclose(dens,
                                dens_true,
                                atol=atol,
                                rtol=max(rtol, 1e-7))

            for rtol in [0, 1E-5]:
                for atol in [1E-6, 1E-2]:
                    for breadth_first in (True, False):
                        yield (check_results, kernel, h, atol, rtol,
                               breadth_first)
示例#16
0
def test_kd_tree_query_radius(n_samples=100, n_features=10):
    np.random.seed(0)
    X = 2 * np.random.random(size=(n_samples, n_features)) - 1
    query_pt = np.zeros(n_features, dtype=float)

    eps = 1E-15  # roundoff error can cause test to fail
    kdt = KDTree(X, leaf_size=5)
    rad = np.sqrt(((X - query_pt) ** 2).sum(1))

    for r in np.linspace(rad[0], rad[-1], 100):
        ind = kdt.query_radius(query_pt, r + eps)[0]
        i = np.where(rad <= r + eps)[0]

        ind.sort()
        i.sort()

        assert_allclose(i, ind)
示例#17
0
def test_kd_tree_query_radius(n_samples=100, n_features=10):
    rng = check_random_state(0)
    X = 2 * rng.random_sample(size=(n_samples, n_features)) - 1
    query_pt = np.zeros(n_features, dtype=float)

    eps = 1E-15  # roundoff error can cause test to fail
    kdt = KDTree(X, leaf_size=5)
    rad = np.sqrt(((X - query_pt)**2).sum(1))

    for r in np.linspace(rad[0], rad[-1], 100):
        ind = kdt.query_radius([query_pt], r + eps)[0]
        i = np.where(rad <= r + eps)[0]

        ind.sort()
        i.sort()

        assert_array_almost_equal(i, ind)
示例#18
0
def generate_pairs_raw(patches, constants):
    """Generate raw pairs without patch normalization."""
    # Convert the list of patch norms into numpy arrays
    patch_database = []
    patch_database.append(
        np.vstack([np.reshape(patch.raw_patch, [-1]) for patch in patches[0]]))
    # Find list of just 2 nearest neighbours for each patch due to duplicate
    nearest = []
    p1 = np.concatenate(patch_database[0:])
    kdt = KDTree(p1, leaf_size=30, metric='euclidean')
    nn = kdt.query(patch_database[0],
                   k=2,
                   return_distance=False,
                   sort_results=False)
    nearest.append(nn)

    return np.concatenate(nearest)
示例#19
0
def test_kd_tree_query_radius(n_samples=100, n_features=10):
    rng = check_random_state(0)
    X = 2 * rng.random_sample(size=(n_samples, n_features)) - 1
    query_pt = np.zeros(n_features, dtype=float)

    eps = 1E-15  # roundoff error can cause test to fail
    kdt = KDTree(X, leaf_size=5)
    rad = np.sqrt(((X - query_pt) ** 2).sum(1))

    for r in np.linspace(rad[0], rad[-1], 100):
        ind = kdt.query_radius([query_pt], r + eps)[0]
        i = np.where(rad <= r + eps)[0]

        ind.sort()
        i.sort()

        assert_array_almost_equal(i, ind)
示例#20
0
    def __call__(self, x, ma):
        h = F.tanh(self.l0(x))
        h = F.tanh(self.l1(h))
        h = F.tanh(self.l2(h))

        # kd_tree
        q_train = []  # for train [variable,variable]
        ind_list = []  # for train
        dist_list = []  # for train
        for j in range(len(ma.maq)):  # loop n_actions
            h_list = ma.mah[j]
            lp = len(h_list)
            leaf_size = lp + (lp / 2)

            tree = KDTree(h_list, leaf_size=leaf_size)
            h_ = h.data

            if lp < 50:
                k = lp
            else:
                k = 50
            dist, ind = tree.query(h_, k=k)

            mahi = ma.mah[j][ind[0]]
            hi = chainer.Variable(cuda.to_cpu(mahi))
            tiled_h = chainer.Variable(np.tile(h.data, (len(ind[0]), 1)))
            wi = F.expand_dims(
                1 /
                (F.sqrt(F.sum((tiled_h - hi) *
                              (tiled_h - hi), axis=1) + 1e-3)), 1)
            w = F.sum(wi, axis=0)
            maqi = ma.maq[j][ind[0]]
            q = chainer.Variable(cuda.to_cpu(maqi))
            qq = F.expand_dims(F.sum(wi * q, axis=0) / w, 1)

            q_train.append(qq)
            ind_list.append(ind)
            dist_list.append(dist)

            self.q_list[0][j] = qq.data
        if self.use_gpu:
            qa = chainer.Variable(cuda.to_cpu(self.q_list))
        else:
            qa = self.q_list

        return qa, q_train, ind_list, dist_list, h.data
示例#21
0
def test_kd_tree_query_radius_distance(n_samples=100, n_features=10):
    np.random.seed(0)
    X = 2 * np.random.random(size=(n_samples, n_features)) - 1
    query_pt = np.zeros(n_features, dtype=float)

    eps = 1E-15  # roundoff error can cause test to fail
    kdt = KDTree(X, leaf_size=5)
    rad = np.sqrt(((X - query_pt) ** 2).sum(1))

    for r in np.linspace(rad[0], rad[-1], 100):
        ind, dist = kdt.query_radius(query_pt, r + eps, return_distance=True)

        ind = ind[0]
        dist = dist[0]

        d = np.sqrt(((query_pt - X[ind]) ** 2).sum(1))

        assert_allclose(d, dist)
示例#22
0
def test_gaussian_kde(n_samples=1000):
    # Compare gaussian KDE results to scipy.stats.gaussian_kde
    from scipy.stats import gaussian_kde
    np.random.seed(0)
    x_in = np.random.normal(0, 1, n_samples)
    x_out = np.linspace(-5, 5, 30)

    for h in [0.01, 0.1, 1]:
        kdt = KDTree(x_in[:, None])
        try:
            gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))
        except TypeError:
            raise SkipTest("Old scipy, does not accept explicit bandwidth.")

        dens_kdt = kdt.kernel_density(x_out[:, None], h) / n_samples
        dens_gkde = gkde.evaluate(x_out)

        assert_array_almost_equal(dens_kdt, dens_gkde, decimal=3)
示例#23
0
def test_kd_tree_query_radius_distance(n_samples=100, n_features=10):
    rng = check_random_state(0)
    X = 2 * rng.random_sample(size=(n_samples, n_features)) - 1
    query_pt = np.zeros(n_features, dtype=float)

    eps = 1E-15  # roundoff error can cause test to fail
    kdt = KDTree(X, leaf_size=5)
    rad = np.sqrt(((X - query_pt) ** 2).sum(1))

    for r in np.linspace(rad[0], rad[-1], 100):
        ind, dist = kdt.query_radius([query_pt], r + eps, return_distance=True)

        ind = ind[0]
        dist = dist[0]

        d = np.sqrt(((query_pt - X[ind]) ** 2).sum(1))

        assert_array_almost_equal(d, dist)
示例#24
0
def test_kd_tree_query_radius_distance(n_samples=100, n_features=10):
    np.random.seed(0)
    X = 2 * np.random.random(size=(n_samples, n_features)) - 1
    query_pt = np.zeros(n_features, dtype=float)

    eps = 1e-15  # roundoff error can cause test to fail
    kdt = KDTree(X, leaf_size=5)
    rad = np.sqrt(((X - query_pt) ** 2).sum(1))

    for r in np.linspace(rad[0], rad[-1], 100):
        ind, dist = kdt.query_radius(query_pt, r + eps, return_distance=True)

        ind = ind[0]
        dist = dist[0]

        d = np.sqrt(((query_pt - X[ind]) ** 2).sum(1))

        assert_allclose(d, dist)
示例#25
0
def test_gaussian_kde(n_samples=1000):
    """Compare gaussian KDE results to scipy.stats.gaussian_kde"""
    from scipy.stats import gaussian_kde
    np.random.seed(0)
    x_in = np.random.normal(0, 1, n_samples)
    x_out = np.linspace(-5, 5, 30)

    for h in [0.01, 0.1, 1]:
        kdt = KDTree(x_in[:, None])
        try:
            gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))
        except TypeError:
            raise SkipTest("Old scipy, does not accept explicit bandwidth.")

        dens_kdt = kdt.kernel_density(x_out[:, None], h) / n_samples
        dens_gkde = gkde.evaluate(x_out)

        assert_array_almost_equal(dens_kdt, dens_gkde, decimal=3)
示例#26
0
    def _fit(self, X):
        #use Euclidean metric if possible, or raise error [IY]
        #note that in sompy.project_realdata() the algorithm is set by default
        # (e.g. to 'brute' or 'kd_tree')
        if self.metric_params is None:
            self.effective_metric_params_ = {}
        else:
            self.effective_metric_params_ = self.metric_params.copy()
        if self.metric not in ['euclidean', 'minkowski']:
            raise ValueError("Using Euclidean distance with the wrong metric")
        self.effective_metric_ = self.metric
        # For minkowski distance, use more efficient methods where available
        if self.metric == 'minkowski':
            p = self.effective_metric_params_.pop('p', 2)
            if p == 2:
                self.effective_metric_ = 'euclidean'
            else:
                raise ValueError(
                    "cannot replace Minkowski with Euclidian metric")

        X = check_array(X, accept_sparse='csr')

        n_samples = X.shape[0]
        if n_samples == 0:
            raise ValueError("n_samples must be greater than 0")

        if issparse(X) and self.effective_metric_ not in VALID_METRICS_SPARSE[
                'brute']:
            raise ValueError("metric '%s' not valid for sparse input" %
                             self.effective_metric_)

        self._fit_method = self.algorithm
        self._fit_X = X

        if self._fit_method == 'ball_tree':
            self._tree = BallTree(X,
                                  self.leaf_size,
                                  metric=self.effective_metric_,
                                  **self.effective_metric_params_)
        elif self._fit_method == 'kd_tree':
            self._tree = KDTree(X,
                                self.leaf_size,
                                metric=self.effective_metric_,
                                **self.effective_metric_params_)
        elif self._fit_method == 'brute':
            self._tree = None
        else:
            raise ValueError("algorithm = '%s' not recognized" %
                             self.algorithm)

        if self.n_neighbors is not None:
            if self.n_neighbors <= 0:
                raise ValueError("Expected n_neighbors > 0. Got %d" %
                                 self.n_neighbors)

        return self
示例#27
0
 def write(self, h, v):
     keys = np.array(self.memory_keys, dtype=np.float32)
     values = np.array(self.memory_values, dtype=np.float32)
     if len(self.memory_keys) > 0:
         tree = KDTree(keys, leaf_size=50)
         distance, index = tree.query(np.array([h], dtype=np.float32))
         if distance[0][0] == 0:
             index = index[0][0]
             self.memory_values[index] += self.lr * (v - self.memory_values[index])
             return
     if len(self.memory_values) < self.capacity:
         self.ages[len(self.memory_values) - 1] = 0
         self.memory_keys.append(h)
         self.memory_values.append(v)
     else:
         index = np.argmin(self.ages)
         self.memory_keys[index] = h
         self.memory_values[index] = v
         self.ages[index] = 0
示例#28
0
 def add(self, state, value, time):
     if len(self) < self.capacity:
         self.states.append(state)
         self.values.append(value)
         self.times.append(time)
     else:
         min_time_idx = int(np.argmin(self.times))
         if time > self.times[min_time_idx]:
             self.replace(state, value, time, min_time_idx)
     self._tree = KDTree(np.array(self.states))
示例#29
0
def test_gaussian_kde(n_samples=1000):
    """Compare gaussian KDE results to scipy.stats.gaussian_kde"""
    from scipy.stats import gaussian_kde
    np.random.seed(0)
    x_in = np.random.normal(0, 1, n_samples)
    x_out = np.linspace(-5, 5, 30)

    for h in [0.01, 0.1, 1]:
        kdt = KDTree(x_in[:, None])
        try:
            gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))
        except TypeError:
            # older versions of scipy don't accept explicit bandwidth
            raise SkipTest

        dens_kdt = kdt.kernel_density(x_out[:, None], h) / n_samples
        dens_gkde = gkde.evaluate(x_out)

        assert_allclose(dens_kdt, dens_gkde, rtol=1E-3, atol=1E-3)
示例#30
0
 def lookup(self, h):
     if len(self.memory_values) == 0:
         return np.zeros((len(h), 1, len(h[0])), dtype=np.float32), np.zeros((len(h), 1), dtype=np.float32)
     keys = np.array(self.memory_keys, dtype=np.float32)
     values = np.array(self.memory_values, dtype=np.float32)
     size = keys.shape[0]
     if size < self.p:
         k = size
     else:
         k = self.p
     queried_keys = np.zeros((len(h), k, len(h[0])), dtype=np.float32)
     queried_values = np.zeros((len(h), k), dtype=np.float32)
     for i, encoded_state in enumerate(h):
         tree = KDTree(keys, leaf_size=50)
         distances, indices = tree.query(np.array([encoded_state], dtype=np.float32), k=k)
         queried_keys[i] = keys[indices]
         queried_values[i] = values[indices][-1]
         self.ages += 1
         self.ages[indices] = 0
     return queried_keys, queried_values
示例#31
0
    def fit_predict(self, xs: np.ndarray, ys: np.ndarray = None):
        kd_tree = KDTree(xs, metric=self.metric, leaf_size=self.leaf_size)
        n_points = xs.shape[0]
        neighbours = kd_tree.query_radius(X=xs, r=self.eps)
        dsu = DisjointSetUnion(n_points)
        for i, neighs in enumerate(neighbours):
            if neighs.shape[0] < self.min_samples:
                continue
            for j in neighs:
                dsu.merge(i, j)

        if ys is None:
            ys = [0] * n_points
            current_cluster_id = 0
            for i in range(n_points):
                if i == dsu.find(i):
                    ys[i] = current_cluster_id
                    current_cluster_id += 1

        return [ys[dsu.find(i)] for i in range(n_points)]
示例#32
0
def test_kd_tree_kde(kernel, h):
    n_samples, n_features = (100, 3)
    rng = check_random_state(0)
    X = rng.random_sample((n_samples, n_features))
    Y = rng.random_sample((n_samples, n_features))
    kdt = KDTree(X, leaf_size=10)

    dens_true = compute_kernel_slow(Y, X, kernel, h)

    for rtol in [0, 1E-5]:
        for atol in [1E-6, 1E-2]:
            for breadth_first in (True, False):
                check_results(kernel, h, atol, rtol, breadth_first, Y, kdt,
                              dens_true)
示例#33
0
def test_kd_tree_two_point(n_samples=100, n_features=3):
    np.random.seed(0)
    X = np.random.random((n_samples, n_features))
    Y = np.random.random((n_samples, n_features))
    r = np.linspace(0, 1, 10)
    kdt = KDTree(X, leaf_size=10)

    D = DistanceMetric.get_metric("euclidean").pairwise(Y, X)
    counts_true = [(D <= ri).sum() for ri in r]

    def check_two_point(r, dualtree):
        counts = kdt.two_point_correlation(Y, r=r, dualtree=dualtree)
        assert_allclose(counts, counts_true)

    for dualtree in (True, False):
        yield check_two_point, r, dualtree
示例#34
0
def test_kd_tree_kde(n_samples=100, n_features=3):
    rng = check_random_state(0)
    X = rng.random_sample((n_samples, n_features))
    Y = rng.random_sample((n_samples, n_features))
    kdt = KDTree(X, leaf_size=10)

    for kernel in [
            'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear',
            'cosine'
    ]:
        for h in [0.01, 0.1, 1]:
            dens_true = compute_kernel_slow(Y, X, kernel, h)

            for rtol in [0, 1E-5]:
                for atol in [1E-6, 1E-2]:
                    for breadth_first in (True, False):
                        yield (check_results, kernel, h, atol, rtol,
                               breadth_first, Y, kdt, dens_true)
示例#35
0
    def add(self, state, value, time, update_type):
        if len(self) < self.capacity:
            self.states.append(state)
            self.values.append(value)
            self.times.append(time)

            self.old_vals.append([value])
        else:
            min_time_idx = int(np.argmin(self.times))
            if time > self.times[min_time_idx]:

                if update_type == 'time average':
                    max_var_idx = int(
                        np.argmax(np.var(np.asarray(self.old_vals), axis=1)))
                    self.replace(state, value, time, max_var_idx)
                else:
                    self.replace(state, value, time, min_time_idx)
        self._tree = KDTree(np.array(self.states))
示例#36
0
    def _fit(self, X):
        self._check_algorithm_metric()
        self._check_hubness_algorithm()
        self._check_algorithm_hubness_compatibility()
        if self.metric_params is None:
            self.effective_metric_params_ = {}
        else:
            self.effective_metric_params_ = self.metric_params.copy()

        effective_p = self.effective_metric_params_.get('p', self.p)
        if self.metric in ['wminkowski', 'minkowski']:
            self.effective_metric_params_['p'] = effective_p

        self.effective_metric_ = self.metric
        # For minkowski distance, use more efficient methods where available
        if self.metric == 'minkowski':
            p = self.effective_metric_params_.pop('p', 2)
            if p <= 0:
                raise ValueError(
                    f"p must be greater than one for minkowski metric, "
                    f"or in ]0, 1[ for fractional norms.")
            elif p == 1:
                self.effective_metric_ = 'manhattan'
            elif p == 2:
                self.effective_metric_ = 'euclidean'
            elif p == np.inf:
                self.effective_metric_ = 'chebyshev'
            else:
                self.effective_metric_params_['p'] = p

        if isinstance(X, NeighborsBase):
            self._fit_X = X._fit_X
            self._tree = X._tree
            self._fit_method = X._fit_method
            self._index = X._index
            self._hubness_reduction = X._hubness_reduction
            return self

        elif isinstance(X, BallTree):
            self._fit_X = X.data
            self._tree = X
            self._fit_method = 'ball_tree'
            return self

        elif isinstance(X, KDTree):
            self._fit_X = X.data
            self._tree = X
            self._fit_method = 'kd_tree'
            return self

        elif isinstance(X, ApproximateNearestNeighbor):
            self._tree = None
            if isinstance(X, PuffinnLSH):
                self._fit_X = X.X_train_
                self._fit_method = 'lsh'
            elif isinstance(X, FalconnLSH):
                self._fit_X = X.X_train_
                self._fit_method = 'falconn_lsh'
            elif isinstance(X, ONNG):
                self._fit_method = 'onng'
            elif isinstance(X, HNSW):
                self._fit_method = 'hnsw'
            elif isinstance(X, RandomProjectionTree):
                self._fit_method = 'rptree'
            self._index = X
            # TODO enable hubness reduction here
            ...
            return self

        X = check_array(X, accept_sparse='csr')

        n_samples = X.shape[0]
        if n_samples == 0:
            raise ValueError(
                f"n_samples must be greater than 0 (but was {n_samples}.")

        if issparse(X):
            if self.algorithm not in ('auto', 'brute'):
                warnings.warn("cannot use tree with sparse input: "
                              "using brute force")
            if self.effective_metric_ not in VALID_METRICS_SPARSE['brute'] \
                    and not callable(self.effective_metric_):
                raise ValueError(
                    f"Metric '{self.effective_metric_}' not valid for sparse input. "
                    f"Use sorted(sklearn.neighbors.VALID_METRICS_SPARSE['brute']) "
                    f"to get valid options. Metric can also be a callable function."
                )
            self._fit_X = X.copy()
            self._tree = None
            self._fit_method = 'brute'
            if self.hubness is not None:
                warnings.warn(
                    f'cannot use hubness reduction with tree: disabling hubness reduction.'
                )
                self.hubness = None
            self._hubness_reduction_method = None
            self._hubness_reduction = NoHubnessReduction()
            return self

        self._fit_method = self.algorithm
        self._fit_X = X
        self._hubness_reduction_method = self.hubness

        if self._fit_method == 'auto':
            # A tree approach is better for small number of neighbors,
            # and KDTree is generally faster when available
            if ((self.n_neighbors is None
                 or self.n_neighbors < self._fit_X.shape[0] // 2)
                    and self.metric != 'precomputed'):
                if self.effective_metric_ in VALID_METRICS['kd_tree']:
                    self._fit_method = 'kd_tree'
                elif (callable(self.effective_metric_)
                      or self.effective_metric_ in VALID_METRICS['ball_tree']):
                    self._fit_method = 'ball_tree'
                else:
                    self._fit_method = 'brute'
            else:
                self._fit_method = 'brute'
            self._index = None

        if self._fit_method == 'ball_tree':
            self._tree = BallTree(X,
                                  self.leaf_size,
                                  metric=self.effective_metric_,
                                  **self.effective_metric_params_)
            self._index = None
        elif self._fit_method == 'kd_tree':
            self._tree = KDTree(X,
                                self.leaf_size,
                                metric=self.effective_metric_,
                                **self.effective_metric_params_)
            self._index = None
        elif self._fit_method == 'brute':
            self._tree = None
            self._index = None
        elif self._fit_method == 'lsh':
            self._index = PuffinnLSH(verbose=self.verbose,
                                     **self.algorithm_params)
            self._index.fit(X)
            self._tree = None
        elif self._fit_method == 'falconn_lsh':
            self._index = FalconnLSH(verbose=self.verbose,
                                     **self.algorithm_params)
            self._index.fit(X)
            self._tree = None
        elif self._fit_method == 'onng':
            self._index = ONNG(verbose=self.verbose, **self.algorithm_params)
            self._index.fit(X)
            self._tree = None
        elif self._fit_method == 'hnsw':
            self._index = HNSW(verbose=self.verbose, **self.algorithm_params)
            self._index.fit(X)
            self._tree = None
        elif self._fit_method == 'rptree':
            self._index = RandomProjectionTree(verbose=self.verbose,
                                               **self.algorithm_params)
            self._index.fit(X)
            self._tree = None  # because it's a tree, but not an sklearn tree...
        else:
            raise ValueError(f"algorithm = '{self.algorithm}' not recognized")

        if self._hubness_reduction_method is None:
            self._hubness_reduction = NoHubnessReduction()
        else:
            n_candidates = self.algorithm_params['n_candidates']
            if 'include_self' in self.kwargs and self.kwargs['include_self']:
                neigh_train = self.kcandidates(X,
                                               n_neighbors=n_candidates,
                                               return_distance=True)
            else:
                neigh_train = self.kcandidates(n_neighbors=n_candidates,
                                               return_distance=True)
            # Remove self distances
            neigh_dist_train = neigh_train[0]  # [:, 1:]
            neigh_ind_train = neigh_train[1]  # [:, 1:]
            if self._hubness_reduction_method == 'ls':
                self._hubness_reduction = LocalScaling(verbose=self.verbose,
                                                       **self.hubness_params)
            elif self._hubness_reduction_method == 'mp':
                self._hubness_reduction = MutualProximity(
                    verbose=self.verbose, **self.hubness_params)
            elif self._hubness_reduction_method == 'dsl':
                self._hubness_reduction = DisSimLocal(verbose=self.verbose,
                                                      **self.hubness_params)
            elif self._hubness_reduction_method == 'snn':
                raise NotImplementedError('feature not yet implemented')
            elif self._hubness_reduction_method == 'simhubin':
                raise NotImplementedError('feature not yet implemented')
            else:
                raise ValueError(
                    f'Hubness reduction algorithm = "{self._hubness_reduction_method}" not recognized.'
                )
            self._hubness_reduction.fit(neigh_dist_train,
                                        neigh_ind_train,
                                        X=X,
                                        assume_sorted=False)

        if self.n_neighbors is not None:
            if self.n_neighbors <= 0:
                raise ValueError(
                    f"Expected n_neighbors > 0. Got {self.n_neighbors:d}")
            else:
                if not np.issubdtype(type(self.n_neighbors), np.integer):
                    raise TypeError(
                        f"n_neighbors does not take {type(self.n_neighbors)} value, "
                        f"enter integer value")

        return self
示例#37
0
def compute_average_scores(pdb_path, cat, it, bu):
    files = glob("%s*_%s_%s.pdb" % (pdb_path, it, bu))
    
    for pdb_filename in sorted(files) :
        pdb_id = basename(pdb_filename)[:-4]
        
        pdb_patch_coord = ("%s%s_patch_coord.txt" % (pdb_path, pdb_id))
        pdb_patch_score = ("%s%s_patch_score.txt" % (pdb_path, pdb_id))
      
        with open(pdb_patch_coord) as coord, open(pdb_patch_score) as score:
            patch_coord = [[float(x) for x in a.split()] for a in coord.readlines()]
            patch_score = [float(x) - threshold[(cat, it, bu)] for x in score.readlines()]
 
        min_v = min(patch_score)
        max_v = max(patch_score)
         
        patch_score_scaled = [(lambda x: -(x / min_v) if x < 0 else (x / max_v))(x) for x in patch_score]
     
        X = np.array([a[0] for a in zip(patch_coord, patch_score_scaled) if a[1] >= 0])
        X_weights = np.array([x for x in patch_score_scaled if x >= 0])
        
        pdb_structure = p.get_structure(pdb_id, pdb_filename)
        atoms = np.array([atm.get_coord() for atm in pdb_structure.get_atoms() if not isHydrogen(atm) and not isHETATM(atm)])
        atoms_tree = KDTree(atoms)     
        
        residues_coord = {}
        for residue in pdb_structure.get_residues() :
            for atm in residue :
                residues_coord[tuple(atm.get_coord())] = residue
    
        average_residues_scores = {residue : 0 for residue in pdb_structure.get_residues()}

        # since the isollation forest algorithm is random, we run it several times to assess the average performance of the method
        
        if outlier_fraction[(cat, it, bu)] : reps = n_iterations
        else : reps = 1
        
        for iteration in xrange(reps) :
            print "Running iteration %d of %d" % (iteration + 1, reps)
            if outlier_fraction[(cat, it, bu)] : 
                forest = IsolationForest(contamination=outlier_fraction[(cat, it, bu)], n_jobs=-1)
                forest.fit(X, sample_weight=X_weights)
         
                prediction_isolation_forest = forest.predict(patch_coord)
                patch_pred_no_outliers = [copysign(1, x) for x in prediction_isolation_forest]
            else : 
                patch_pred_no_outliers = [copysign(1, x) for x in patch_score]
            # here we map the patch predictions on the underlying residues
            for i in xrange(len(patch_coord)) : # for each patch
                # if it was predicted as non-interface continue to the next
                if patch_pred_no_outliers[i] < 0 : continue 
                # multiple residues can be underneath a given patch, we do not want to consider the same residue more than once
                marked_residues = set() 
                # get all atoms within mapping_distance from the given patch center
                indexes = atoms_tree.query_radius([patch_coord[i]], r=mapping_distance, count_only = False, return_distance=True, sort_results = True)
                for ind in zip(indexes[0][0], indexes[1][0]) :
                    # which residue does the current atom belong to?
                    current_res = residues_coord[tuple(atoms[ind[0]])] 
                    # if already considered continue to the next
                    if current_res in marked_residues : continue 
                    # increase the score of the current residue
                    average_residues_scores[current_res] += 1 / (1.0 + ind[1]) # patch_pred_no_outliers[i] / (1.0 + ind[1])
                    # mark as seen for the current patch
                    marked_residues.add(current_res)
             
        average_residues_scores.update((x, y / reps) for x, y in average_residues_scores.items())
        
        residues_with_scores = [(lambda x, y : (x[2], str(x[3][1]) + x[3][2], y))(residue.get_full_id(), score) for residue, score in average_residues_scores.items()]
        residues_with_scores.sort(key=lambda x : x[1])
        residues_with_scores.sort(key=lambda x : x[0])

        prediction_path = pdb_path + "our_prediction/"
        if not path.exists(prediction_path) : makedirs(prediction_path)
        print pdb_id
        with open("%s%s_residue_scores.txt" % (prediction_path, pdb_id), "wb") as output_residue_scores :
            for r in residues_with_scores :
                output_residue_scores.write("%s;%s;%f\n" %(r[0], r[1], r[2]))
示例#38
0
import cv2
import pickle
from sklearn.neighbors.kd_tree import KDTree
import numpy as np

from bagofvisualwords import BagOfVisualWords

from VLADlib.VLAD import *
from VLADlib.Descriptors import *

pathVD = "visualWords/visualWords.pickle"
with open(pathVD, 'rb') as f:
    vocab = pickle.load(f)

training = np.asarray([i.toarray()[0].tolist() for i in vocab])
tree = KDTree(training, leaf_size=2)

image = 'dataset/3.jpg'
im = cv2.imread(image)

# initial BoW
pathVD = 'visualDictionary/visualDictionary2ORB.pickle'
with open(pathVD, 'rb') as g:
    visualDictionary = pickle.load(g)

bovw = BagOfVisualWords(visualDictionary.cluster_centers_)

#compute descriptors
kp, descriptor = describeORB(im)

# represent at BoW
示例#39
0
#from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors.kd_tree import KDTree
#from sklearn.neighbors import DistanceMetric
import numpy as np
import get_data2 as gd

headers = gd.get_headers()
dicts = gd.get_data_list_of_dicts() 

rows_lol = []
for i in range(len(gd.get_data_slice(headers[0], dicts))):
	rows_lol.append([])

for i in range(len(headers)):
	if i ==1 or i==4:
		column = gd.get_data_slice_numbers(headers[i], dicts)
	else:
		column = gd.get_data_slice_numbers(headers[i], dicts)
	for j in range(len(gd.get_data_slice(headers[0], dicts))):
		rows_lol[j].append(column[j])

X = np.array(rows_lol)
#nbrs = NearestNeighbors(n_neighbors=5, algorithm ='kd_tree', metric ='jaccard').fit(X)
kdt = KDTree(X, leaf_size=30, metric='euclidean')
kdt.query(X, k=3, return_distance=False)
示例#40
0
    def _fit(self, X):
        self._check_algorithm_metric()
        self._check_hubness_algorithm()
        self._check_algorithm_hubness_compatibility()
        if self.metric_params is None:
            self.effective_metric_params_ = {}
        else:
            self.effective_metric_params_ = self.metric_params.copy()

        effective_p = self.effective_metric_params_.get('p', self.p)
        if self.metric in ['wminkowski', 'minkowski']:
            self.effective_metric_params_['p'] = effective_p

        self.effective_metric_ = self.metric
        # For minkowski distance, use more efficient methods where available
        if self.metric == 'minkowski':
            p = self.effective_metric_params_.pop('p', 2)
            if p <= 0:
                raise ValueError(f"p must be greater than one for minkowski metric, "
                                 f"or in ]0, 1[ for fractional norms.")
            elif p == 1:
                self.effective_metric_ = 'manhattan'
            elif p == 2:
                self.effective_metric_ = 'euclidean'
            elif p == np.inf:
                self.effective_metric_ = 'chebyshev'
            else:
                self.effective_metric_params_['p'] = p

        if isinstance(X, NeighborsBase):
            self._fit_X = X._fit_X
            self._tree = X._tree
            self._fit_method = X._fit_method
            self._index = X._index
            self._hubness_reduction = X._hubness_reduction
            return self

        elif isinstance(X, BallTree):
            self._fit_X = X.data
            self._tree = X
            self._fit_method = 'ball_tree'
            return self

        elif isinstance(X, KDTree):
            self._fit_X = X.data
            self._tree = X
            self._fit_method = 'kd_tree'
            return self

        elif isinstance(X, ApproximateNearestNeighbor):
            self._tree = None
            if isinstance(X, PuffinnLSH):
                self._fit_X = np.array([X.index_.get(i) for i in range(X.n_indexed_)]) * X.X_indexed_norm_
                self._fit_method = 'lsh'
            elif isinstance(X, FalconnLSH):
                self._fit_X = X.X_train_
                self._fit_method = 'falconn_lsh'
            elif isinstance(X, NNG):
                self._fit_X = None
                self._fit_method = 'nng'
            elif isinstance(X, HNSW):
                self._fit_X = None
                self._fit_method = 'hnsw'
            elif isinstance(X, RandomProjectionTree):
                self._fit_X = None
                self._fit_method = 'rptree'
            self._index = X
            # TODO enable hubness reduction here.
            # We do not store X_train in all cases atm.
            # self._hubness_reduction_method = self.hubness
            # self._set_hubness_reduction(self._fit_X)
            return self

        X = check_array(X, accept_sparse='csr')

        n_samples = X.shape[0]
        if n_samples == 0:
            raise ValueError(f"n_samples must be greater than 0 (but was {n_samples}.")

        if issparse(X):
            if self.algorithm not in ('auto', 'brute'):
                warnings.warn("cannot use tree with sparse input: "
                              "using brute force")
            if self.effective_metric_ not in VALID_METRICS_SPARSE['brute'] \
                    and not callable(self.effective_metric_):
                raise ValueError(f"Metric '{self.effective_metric_}' not valid for sparse input. "
                                 f"Use sorted(sklearn.neighbors.VALID_METRICS_SPARSE['brute']) "
                                 f"to get valid options. Metric can also be a callable function.")
            self._fit_X = X.copy()
            self._tree = None
            self._fit_method = 'brute'
            if self.hubness is not None:
                warnings.warn(f'cannot use hubness reduction with sparse data: disabling hubness reduction.')
                self.hubness = None
            self._hubness_reduction_method = None
            self._hubness_reduction = NoHubnessReduction()
            return self

        self._fit_method = self.algorithm
        self._fit_X = X
        self._hubness_reduction_method = self.hubness

        if self._fit_method == 'auto':
            # A tree approach is better for small number of neighbors,
            # and KDTree is generally faster when available
            if ((self.n_neighbors is None or
                 self.n_neighbors < self._fit_X.shape[0] // 2) and
                    self.metric != 'precomputed'):
                if self.effective_metric_ in VALID_METRICS['kd_tree']:
                    self._fit_method = 'kd_tree'
                elif (callable(self.effective_metric_) or
                      self.effective_metric_ in VALID_METRICS['ball_tree']):
                    self._fit_method = 'ball_tree'
                else:
                    self._fit_method = 'brute'
            else:
                self._fit_method = 'brute'
            self._index = None

        if self._fit_method == 'ball_tree':
            self._tree = BallTree(X, self.leaf_size,
                                  metric=self.effective_metric_,
                                  **self.effective_metric_params_)
            self._index = None
        elif self._fit_method == 'kd_tree':
            self._tree = KDTree(X, self.leaf_size,
                                metric=self.effective_metric_,
                                **self.effective_metric_params_)
            self._index = None
        elif self._fit_method == 'brute':
            self._tree = None
            self._index = None
        elif self._fit_method == 'lsh':
            self._index = PuffinnLSH(**self.algorithm_params)
            self._index.fit(X)
            self._tree = None
        elif self._fit_method == 'falconn_lsh':
            self._index = FalconnLSH(**self.algorithm_params)
            self._index.fit(X)
            self._tree = None
        elif self._fit_method == 'nng':
            self._index = NNG(**self.algorithm_params)
            self._index.fit(X)
            self._tree = None
        elif self._fit_method == 'hnsw':
            self._index = HNSW(**self.algorithm_params)
            self._index.fit(X)
            self._tree = None
        elif self._fit_method == 'rptree':
            self._index = RandomProjectionTree(**self.algorithm_params)
            self._index.fit(X)
            self._tree = None  # because it's a tree, but not an sklearn tree...
        else:
            raise ValueError(f"algorithm = '{self.algorithm}' not recognized")

        # Fit hubness reduction method
        self._set_hubness_reduction(X)

        if self.n_neighbors is not None:
            if self.n_neighbors <= 0:
                raise ValueError(f"Expected n_neighbors > 0. Got {self.n_neighbors:d}")
            else:
                if not np.issubdtype(type(self.n_neighbors), np.integer):
                    raise TypeError(
                        f"n_neighbors does not take {type(self.n_neighbors)} value, "
                        f"enter integer value"
                        )

        return self