def test_k_and_radius_neighbors_duplicates():
    # Test behavior of kneighbors when duplicates are present in query

    for algorithm in ALGORITHMS:
        nn = neighbors.NearestNeighbors(n_neighbors=1, algorithm=algorithm)
        nn.fit([[0], [1]])

        # Do not do anything special to duplicates.
        kng = nn.kneighbors_graph([[0], [1]], mode='distance')
        assert_array_equal(kng.A, np.array([[0., 0.], [0., 0.]]))
        assert_array_equal(kng.data, [0., 0.])
        assert_array_equal(kng.indices, [0, 1])

        dist, ind = nn.radius_neighbors([[0], [1]], radius=1.5)
        check_object_arrays(dist, [[0, 1], [1, 0]])
        check_object_arrays(ind, [[0, 1], [0, 1]])

        rng = nn.radius_neighbors_graph([[0], [1]], radius=1.5)
        assert_array_equal(rng.A, np.ones((2, 2)))

        rng = nn.radius_neighbors_graph([[0], [1]],
                                        radius=1.5,
                                        mode='distance')
        assert_array_equal(rng.A, [[0, 1], [1, 0]])
        assert_array_equal(rng.indices, [0, 1, 0, 1])
        assert_array_equal(rng.data, [0, 1, 1, 0])

        # Mask the first duplicates when n_duplicates > n_neighbors.
        X = np.ones((3, 1))
        nn = neighbors.NearestNeighbors(n_neighbors=1)
        nn.fit(X)
        dist, ind = nn.kneighbors()
        assert_array_equal(dist, np.zeros((3, 1)))
        assert_array_equal(ind, [[1], [0], [1]])

        # Test that zeros are explicitly marked in kneighbors_graph.
        kng = nn.kneighbors_graph(mode='distance')
        assert_array_equal(kng.A, np.zeros((3, 3)))
        assert_array_equal(kng.data, np.zeros(3))
        assert_array_equal(kng.indices, [1., 0., 1.])
        assert_array_equal(
            nn.kneighbors_graph().A,
            np.array([[0., 1., 0.], [1., 0., 0.], [0., 1., 0.]]))
Пример #2
0
def test_callable_metric():
    def custom_metric(x1, x2):
        return np.sqrt(np.sum(x1**2 + x2**2))

    X = np.random.RandomState(42).rand(20, 2)
    nbrs1 = neighbors.NearestNeighbors(3,
                                       algorithm='auto',
                                       metric=custom_metric)
    nbrs2 = neighbors.NearestNeighbors(3,
                                       algorithm='brute',
                                       metric=custom_metric)

    nbrs1.fit(X)
    nbrs2.fit(X)

    dist1, ind1 = nbrs1.kneighbors(X)
    dist2, ind2 = nbrs2.kneighbors(X)

    assert_array_almost_equal(dist1, dist2)
Пример #3
0
 def __init__(self, dataset, soft_encoding=True):
     self.dataset = dataset
     self.kernels = bins_centers
     self.L_normalize = 100
     self.kernel_normalize = np.max(np.abs(self.kernels))
     self.num_bins = len(self.kernels)
     self.neighborhood = knn.NearestNeighbors(n_neighbors=5).fit(
         self.kernels / self.kernel_normalize)
     self.soft_encoding = soft_encoding
     self.device = torch.device('cuda:0')
Пример #4
0
def nearest_neighbors(src, dst):
    model = neighbors.NearestNeighbors(n_neighbors=1)

    # feed dst points to kd-tree
    model.fit(dst)

    # search nearest points of src in dst
    distances, indices = model.kneighbors(src)

    return distances, indices
Пример #5
0
def apply_mask_and_get_affinity(seeds,
                                niimg,
                                radius,
                                allow_overlap,
                                n_jobs=1,
                                mask_img=None):
    import time
    start = time.time()

    seeds = list(seeds)
    affine = niimg.affine

    # Compute world coordinates of all in-mask voxels.
    mask_img = check_niimg_3d(mask_img)
    mask_img = image.resample_img(mask_img,
                                  target_affine=affine,
                                  target_shape=niimg.shape[:3],
                                  interpolation='nearest')
    mask, _ = masking._load_mask_img(mask_img)
    mask_coords = list(zip(*np.where(mask != 0)))

    X = masking._apply_mask_fmri(niimg, mask_img)

    # For each seed, get coordinates of nearest voxel
    nearests = joblib.Parallel(n_jobs=n_jobs)(
        joblib.delayed(seed_nearest)(seed_chunk, affine, mask_coords)
        for thread_id, seed_chunk in enumerate(np.array_split(seeds, n_jobs)))
    nearests = [i for j in nearests for i in j]

    mask_coords = np.asarray(list(zip(*mask_coords)))
    mask_coords = coord_transform(mask_coords[0], mask_coords[1],
                                  mask_coords[2], affine)
    mask_coords = np.asarray(mask_coords).T

    clf = neighbors.NearestNeighbors(radius=radius)
    A = clf.fit(mask_coords).radius_neighbors_graph(seeds)
    A = A.tolil()
    for i, nearest in enumerate(nearests):
        if nearest is None:
            continue
        A[i, nearest] = True
    # Include the voxel containing the seed itself if not masked
    mask_coords = mask_coords.astype(int).tolist()
    for i, seed in enumerate(seeds):
        try:
            A[i, mask_coords.index(seed)] = True
        except ValueError:
            # seed is not in the mask
            pass

    if not allow_overlap:
        if np.any(A.sum(axis=0) >= 2):
            raise ValueError('Overlap detected between spheres')

    return X, A
Пример #6
0
    def get_transition_matrix(self, k=10, ann=False):
        """
        implementation of transition matrix.
        :param k: number of each node's neighbors.
        """

        # kNN
        if ann == False:
            nbrs = neighbors.NearestNeighbors(n_neighbors=k,
                                              metric='euclidean',
                                              n_jobs=-1).fit(self.data)
            distances, indices = nbrs.kneighbors(self.data)
        else:  # need to imporve
            lshf = neighbors.LSHForest(n_neighbors=3 * k).fit(self.data)
            distances, indices = lshf.kneighbors(self.data, n_neighbors=k)

        N = self.shape[0]
        sqdistances = np.square(distances)
        sigmas = distances[:, -1]
        self.sigmas = sigmas

        # kernel matrix
        sigs_mul = np.multiply.outer(sigmas, sigmas)
        kernel_matrix = np.zeros((N, k))
        for i in range(N):
            kernel_matrix[i, :] = np.exp(
                -np.divide(sqdistances[i, :], 2 *
                           (sigs_mul[i, indices[i, :]])))

        weights = kernel_matrix
        indptr = range(0, (N + 1) * k, k)
        weight_matrix = sparse.csr_matrix(
            (weights.flatten(), indices.flatten(), indptr),
            shape=(N, N)).toarray()

        # symmetric
        for i, col in enumerate(indices):
            for j in col:
                if i not in set(indices[j]):
                    weight_matrix[j, i] = weight_matrix[i, j]

        weight_sum = np.power(weight_matrix.sum(axis=0)[:, None],
                              -1 / 2).flatten()
        weight_sum = np.diag(weight_sum)
        weight_matrix = weight_sum @ weight_matrix @ weight_sum
        M = np.divide(weight_matrix, weight_matrix.sum(axis=1)[:, None])

        mevals, mevecs = sp.linalg.eigh(M)

        self.M = M
        self.mevals = mevals
        self.mevecs = mevecs
        self.indices = indices

        return M
Пример #7
0
def test_valid_brute_metric_for_auto_algorithm():
    X = rng.rand(12, 12)
    Xcsr = csr_matrix(X)

    # check that there is a metric that is valid for brute
    # but not ball_tree (so we actually test something)
    assert_in("cosine", VALID_METRICS['brute'])
    assert_false("cosine" in VALID_METRICS['ball_tree'])

    # Metric which don't required any additional parameter
    require_params = ['mahalanobis', 'wminkowski', 'seuclidean']
    for metric in VALID_METRICS['brute']:
        if metric != 'precomputed' and metric not in require_params:
            nn = neighbors.NearestNeighbors(n_neighbors=3, algorithm='auto',
                                            metric=metric).fit(X)
            nn.kneighbors(X)
        elif metric == 'precomputed':
            X_precomputed = rng.random_sample((10, 4))
            Y_precomputed = rng.random_sample((3, 4))
            DXX = metrics.pairwise_distances(X_precomputed, metric='euclidean')
            DYX = metrics.pairwise_distances(Y_precomputed, X_precomputed,
                                             metric='euclidean')
            nb_p = neighbors.NearestNeighbors(n_neighbors=3)
            nb_p.fit(DXX)
            nb_p.kneighbors(DYX)

    for metric in VALID_METRICS_SPARSE['brute']:
        if metric != 'precomputed' and metric not in require_params:
            nn = neighbors.NearestNeighbors(n_neighbors=3, algorithm='auto',
                                            metric=metric).fit(Xcsr)
            nn.kneighbors(Xcsr)

    # Metric with parameter
    VI = np.dot(X, X.T)
    list_metrics = [('seuclidean', dict(V=rng.rand(12))),
                    ('wminkowski', dict(w=rng.rand(12))),
                    ('mahalanobis', dict(VI=VI))]
    for metric, params in list_metrics:
        nn = neighbors.NearestNeighbors(n_neighbors=3, algorithm='auto',
                                        metric=metric,
                                        metric_params=params).fit(X)
        nn.kneighbors(X)
def test_non_euclidean_kneighbors():
    rng = np.random.RandomState(0)
    X = rng.rand(5, 5)

    # Find a reasonable radius.
    dist_array = pairwise_distances(X).flatten()
    np.sort(dist_array)
    radius = dist_array[15]

    # Test kneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.kneighbors_graph(X,
                                                3,
                                                metric=metric,
                                                mode='connectivity',
                                                include_self=True).toarray()
        nbrs1 = neighbors.NearestNeighbors(3, metric=metric).fit(X)
        assert_array_equal(nbrs_graph, nbrs1.kneighbors_graph(X).toarray())

    # Test radiusneighbors_graph
    for metric in ['manhattan', 'chebyshev']:
        nbrs_graph = neighbors.radius_neighbors_graph(
            X, radius, metric=metric, mode='connectivity',
            include_self=True).toarray()
        nbrs1 = neighbors.NearestNeighbors(metric=metric, radius=radius).fit(X)
        assert_array_equal(nbrs_graph, nbrs1.radius_neighbors_graph(X).A)

    # Raise error when wrong parameters are supplied,
    X_nbrs = neighbors.NearestNeighbors(3, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError,
                  neighbors.kneighbors_graph,
                  X_nbrs,
                  3,
                  metric='euclidean')
    X_nbrs = neighbors.NearestNeighbors(radius=radius, metric='manhattan')
    X_nbrs.fit(X)
    assert_raises(ValueError,
                  neighbors.radius_neighbors_graph,
                  X_nbrs,
                  radius,
                  metric='euclidean')
Пример #9
0
def median_kneighbour_distance(X, k=5):
    """Calculate the median distance between a set of random datapoints and
    their kth nearest neighbours. This is a heuristic for setting the
    kernel length scale."""
    N_all = X.shape[0]
    N_subset = min(N_all, 2000)
    sample_idx_train = np.random.permutation(N_all)[:N_subset]
    nn = neighbors.NearestNeighbors(k)
    nn.fit(X[sample_idx_train, :])
    d, idx = nn.kneighbors(X[sample_idx_train, :])
    return np.median(d[:, -1])
    def __init__(self,NN,sigma,km_filepath='',cc=-1):
        if(check_value(cc,-1)):
            self.cc = np.load(km_filepath)
        else:
            self.cc = cc
        self.K = self.cc.shape[0]
        self.NN = int(NN)
        self.sigma = sigma
        self.nbrs = nn.NearestNeighbors(n_neighbors=NN, algorithm='ball_tree').fit(self.cc)

        self.alreadyUsed = False
Пример #11
0
 def finalize(self):
   if self.nn is None:
     if not self.observations:
       raise Exception('no observations')
     if not self.observations[0][0]:
       # 0-length vectors are not allowed
       xs = [[0] for _ in self.observations]
     else:
       xs = [x for (x, _) in self.observations]
     self.nn = neighbors.NearestNeighbors()
     self.nn.fit(xs)
Пример #12
0
    def __init__(self, split):
        self.split = split

        self.folder = 'data/{}'.format(split)
        self.files = [f for f in os.listdir(self.folder) if f.lower().endswith('.jpeg')]

        # Load the array of quantized ab value
        q_ab = np.load("data/pts_in_hull.npy")
        self.nb_q = q_ab.shape[0]
        # Fit a NN to q_ab
        self.nn_finder = nn.NearestNeighbors(n_neighbors=nb_neighbors, algorithm='ball_tree').fit(q_ab)
 def buildTree(self):
     arr = np.array(self.values)
     if self.se2:
         arr = np.reshape(arr, [-1, 6])
     else:
         arr = np.reshape(arr, [-1, 7])
     self.nn = sk.NearestNeighbors(n_jobs=-1,
                                   algorithm='brute',
                                   leaf_size=100,
                                   metric=self.metric)
     self.nn.fit(arr)
Пример #14
0
def knn_diversity_stats(training_set, generated_imgs, k=3):
    """
    Find the k=3 nearest neighnors of an image in the training set and
    returns the average distance
    :param array training_set: the training set of images according to which we will find the nearest neighbours
    :param array generated_imgs: the images whose nearest neighbours we wish to find
    """
    knn = neighbors.NearestNeighbors(n_neighbors=k)
    knn.fit(training_set, y=np.zeros(shape=(len(training_set), )))

    dists, idxs = knn.kneighbors(generated_imgs)
    return np.average(dists)
Пример #15
0
    def __k_neighbor(k_n, algm, feature):
        """
        k neighbor will compute k-Nearest Neighbors sklearn algorithm

        :param k_n: int, integer number for k nearest neighbors groups; '2' set as default
        :param algm: str, string name for k-NN's algorithm choice; 'auto' set as default
        :param feature: np.array, np.array object with column features
        :return: list, python list with numpy array object for each neighbor
        """
        n_neighbor = neighbors.NearestNeighbors(n_neighbors=k_n, algorithm=algm)
        model_fit = n_neighbor.fit(feature)
        return model_fit.kneighbors(feature)
Пример #16
0
def quantize(inputs, to_points, to_points_one_hot_encoded):
    neighbors = nn.NearestNeighbors(n_neighbors=10,
                                    algorithm='auto').fit(to_points)
    dists, indices = neighbors.kneighbors(inputs)

    end_points = np.zeros((inputs.shape[0], to_points.shape[0]))
    sigma = 5.0
    wts = np.exp(-dists**2 / (2 * sigma**2))
    wts = wts / np.sum(wts, axis=1)[:, np.newaxis]
    end_points[np.arange(0, inputs.shape[0], dtype='int')[:, np.newaxis],
               indices] = wts
    return end_points
Пример #17
0
def knnThing(matrix, labels):

    animals = []

    print "\033[1mYou are going to pick 5 animals that are similar to your chosen animal.  Pick different animals each time and do not pick your chosen animal itself.\033[0m\n"

    inp = -1

    while len(animals) < 5:
        while inp < 0 or inp >= 50 or inp in animals:
            input = raw_input(
                "\nGive the index of one of the following animals:\n" +
                printAnimalOptions(labels, animals)) + "\n"

            try:
                inp = int(input)
                if inp < 0 or inp >= 50:
                    print "Please give a value between 0 and 49."
                elif inp in animals:
                    print labels[inp] + " has already been selected!"
            except:
                print "Please enter only an integer."
        print "Ok, " + labels[inp] + " saved."
        animals += [inp]
        inp = -1

    counts = dict()

    animals.sort(reverse=True)

    animalRows = []
    for animal in animals:
        animalRows += [[matrix[animal]]]

    matrix = np.delete(matrix, animals, axis=0)
    labels = np.delete(labels, animals, axis=0)

    friendos = neighbors.NearestNeighbors(n_neighbors=5)
    friendos.fit(matrix)

    for animalRow in animalRows:
        fiveClosest = friendos.kneighbors(animalRow, return_distance=False)[0]
        for closeAnimal in fiveClosest:
            if closeAnimal not in counts:
                counts[closeAnimal] = 1
            else:
                counts[closeAnimal] += 1

    maxInd = max(counts, key=counts.get)

    print "We predict your animal was " + labels[maxInd] + "."

    return
Пример #18
0
def split_with_wasserstein(texts, test_set_size,
                           no_of_trials, min_df,
                           leaf_size):
  """Finds test sets by maximizing Wasserstein distances among the given texts.

  This is separating the given texts into training/dev and test sets based on an
  approximate Wasserstein method. First all texts are indexed in a nearest
  neighbors structure. Then a new test centroid is sampled randomly, from which
  the nearest neighbors in Wasserstein space are extracted. Those constitute
  the new test set.
  Similarity is computed based on document-term counts.

  Args:
    texts: Texts to split into training/dev and test sets.
    test_set_size: Number of elements the new test set should contain.
    no_of_trials: Number of test sets requested.
    min_df: Mainly for speed-up and memory efficiency. All tokens must occur at
      least this many times to be considered in the Wasserstein computation.
    leaf_size: Leaf size parameter of the nearest neighbor search. Set high
      values for slower, but less memory-heavy computation.

  Returns:
    Returns a List of test set indices, one for each trial. The indices
    correspond to the items in `texts` that should be part of the test set.
  """
  vectorizer = feature_extraction.text.CountVectorizer(
      dtype=np.int8, min_df=min_df)
  logging.info('Creating count vectors.')
  text_counts = vectorizer.fit_transform(texts)
  text_counts = text_counts.todense()
  logging.info('Count vector shape %s.', text_counts.shape)
  logging.info('Creating tree structure.')
  nn_tree = neighbors.NearestNeighbors(
      n_neighbors=test_set_size,
      algorithm='ball_tree',
      leaf_size=leaf_size,
      metric=stats.wasserstein_distance)
  nn_tree.fit(text_counts)
  logging.info('Sampling test sets.')
  test_set_indices = []

  for trial in range(no_of_trials):
    logging.info('Trial set: %d.', trial)
    # Sample random test centroid.
    sampled_poind = np.random.randint(
        text_counts.max().max() + 1, size=(1, text_counts.shape[1]))
    nearest_neighbors = nn_tree.kneighbors(sampled_poind, return_distance=False)
    # We queried for only one datapoint.
    nearest_neighbors = nearest_neighbors[0]
    logging.info(nearest_neighbors[:10])
    test_set_indices.append(nearest_neighbors)

  return test_set_indices
Пример #19
0
    def dist(self, X, Y=None):
        if Y is X or Y is None:
            d = neighbors.kneighbors_graph(X,
                                           self.k,
                                           mode='distance',
                                           metric=self.metric)
        else:
            n = neighbors.NearestNeighbors(metric=self.metric)
            n.fit(Y)
            d = n.kneighbors_graph(X, self.k, mode='distance')

        return d.toarray()  # since we cant deal with sparse so far
Пример #20
0
    def fit_history(self, udf_metric, n_cases):
        """
        Args:
            udf_metric: user-defined metric object
            n_cases:    int
        """

        self.neigh = neighbors.NearestNeighbors(n_neighbors=n_cases,
                                                metric=udf_metric)
        self.neigh.fit(self.normalized_history)
        self.indices = self.neigh.kneighbors(self.normalized_current,
                                             return_distance=False)[0]
Пример #21
0
def laplacian_matrix(data, k):
    """
    :param data: containing data points,
    :param k: the number of neighbors considered (this distance metric is cosine,
    and the weights are measured by cosine)
    :return:
    """
    # import matlab.engine as ME
    # import matlab
    # engine = ME.start_matlab()
    # calling from matllab
    # options = dict()
    # options.update({'k' : 10})
    # options.update({'NeighborMode' : 'KNN'})
    #
    # options.update({'Metric' : 'Cosine'})
    # options.update({'WeightMode' : 'Cosine'})

    # options.update({'Metric': 'Euclidean'})
    # options.update({'WeightMode': 'HeatKernel'})
    # options.update({'t' : 1.0})

    # sim = np.array(engine.lapgraph(matlab.double(data.tolist()), options))
    # S = [np.sum(row) for row in sim]
    #
    # for i in range(len(sim)):
    #     sim[i] = [sim[i][j] / (S[i] * S[j]) ** 0.5 for j in range(len(sim))]
    #
    # L = np.identity(len(sim)) - sim
    # return L
    nn = neighbors.NearestNeighbors(n_neighbors=k,
                                    algorithm='brute',
                                    metric='cosine')
    nn.fit(data)
    dist, nn = nn.kneighbors(return_distance=True)
    sim = np.zeros((len(data), len(data)))
    for ins_index in range(len(sim)):
        dist_row = dist[ins_index]
        nn_row = nn[ins_index]
        for dist_value, ind_index in zip(dist_row, nn_row):
            sim[ins_index][ind_index] = 1.0 - dist_value
            sim[ind_index][ins_index] = 1.0 - dist_value
    for i in range(len(sim)):
        sim[i][i] = 1.0

    S = [np.sum(row) for row in sim]

    for i in range(len(sim)):
        sim[i] = [sim[i][j] / (S[i] * S[j])**0.5 for j in range(len(sim))]

    L = np.identity(len(sim)) - sim
    return L
Пример #22
0
def predict_NearestNeighbors(train_data,
                             train_labels,
                             test_data,
                             nb_neighbors=5):
    print("Starting to compute Nearest Neighbors")
    computeStart = time.time()
    convert_to_minutes = False

    # Initialization
    neighbors_kdtree = neighbors.NearestNeighbors(n_neighbors=nb_neighbors,
                                                  algorithm='kd_tree')

    # Training
    print("Nearest Neighbors : starting training")
    neighbors_kdtree.fit(train_data, train_labels)

    # Predictions on test_data
    print("Nearest Neighbors : starting predictions")
    distances, indices = neighbors_kdtree.kneighbors(test_data)
    predicted_labels = np.ndarray(shape=(len(test_data)))
    for i in range(len(test_data)):
        if nb_neighbors == 1:
            predicted_labels[i] = indices[i]
        else:
            mean_distances = {}
            for nb in range(nb_neighbors):
                actual_label = train_labels[indices[i][nb]]
                actual_distance = distances[i][nb]
                if actual_label not in mean_distances:
                    mean_distances[actual_label] = []
                mean_distances[actual_label].append(actual_distance)

            md_keys = list(mean_distances.keys())
            min_label = md_keys[0]
            min_distance = np.mean(mean_distances[min_label])
            for label in md_keys:
                if label != min_label:
                    actual_distance = np.mean(mean_distances[label])
                    if actual_distance < min_distance:
                        min_label = label
                        min_distance = actual_distance
            predicted_labels[i] = min_label

    computeDuration = time.time() - computeStart
    if computeDuration > 60:
        computeDuration = computeDuration / 60
        convert_to_minutes = True
    unit = " min" if convert_to_minutes else " s"
    print("Nearest Neighbors finished, computing duration = " +
          str(computeDuration) + unit)

    return predicted_labels
Пример #23
0
 def analyze(self, data):
     X = data["value"]
     Y = data["geofips"].astype(object)
     knn = neighbors.KNeighborsClassifier()
     neighbor = neighbors.NearestNeighbors(n_neighbors=7, algorithm="brute")
     fit = knn.fit(X.to_frame(), Y.to_frame().values.ravel())
     p = neighbor.fit(X.to_frame())
     pred = p.kneighbors(X.to_frame())
     predicted = pred[1]
     cols = ["i", "1", "2", "3", "4", "5", "6"]
     df = pd.DataFrame(predicted, columns=cols)
     df_melt = pd.melt(df, id_vars=["i"])
     return df_melt, data
Пример #24
0
def test_radius_neighbors_boundary_handling():
    """Test whether points lying on boundary are handled consistently"""

    X = np.array([[1.5], [3.0], [3.01]])
    radius = 3.0

    for algorithm in ALGORITHMS:
        nbrs = neighbors.NearestNeighbors(radius=radius,
                                          algorithm=algorithm).fit(X)
        results = nbrs.radius_neighbors([0.0], return_distance=False)
        assert_equal(results.shape, (1, ))
        assert_equal(results.dtype, object)
        assert_array_equal(results[0], [0, 1])
Пример #25
0
    def build(self, data, k):
        self.check_metric(self.metric)
        self.index = neighbors.NearestNeighbors(
            algorithm="ball_tree",
            metric=self.metric,
            metric_params=self.metric_params,
            n_jobs=self.n_jobs,
        )
        self.index.fit(data)

        # Return the nearest neighbors in the training set
        distances, indices = self.index.kneighbors(n_neighbors=k)
        return indices, distances
Пример #26
0
    def test_knn_distance(self):
        mapper = KeplerMapper()
        data = np.random.rand(100, 5)
        lens = mapper.project(data, projection="knn_distance_4", scaler=None)

        nn = neighbors.NearestNeighbors(n_neighbors=4)
        nn.fit(data)
        lens_confirm = np.sum(
            nn.kneighbors(data, n_neighbors=4, return_distance=True)[0], axis=1
        ).reshape((-1, 1))

        assert lens.shape == (100, 1)
        np.testing.assert_array_equal(lens, lens_confirm)
Пример #27
0
    def get_transition_matrix2(self, k=10):
        """
        implemantation of transition matrix of DPT.
        :param k: number of each node's neighbors.
        """

        # kNN
        N = self.shape[0]
        nbrs = neighbors.NearestNeighbors(n_neighbors=k,
                                          metric='euclidean').fit(self.data)
        distances, indices = nbrs.kneighbors(self.data)
        sqdistances = np.square(distances)
        sigmas = distances[:, -1] / 2

        # kernel matrix
        sigs_sum = np.add.outer(sigmas**2, sigmas**2)
        sig_mul = np.multiply.outer(sigmas, sigmas)
        kernel_matrix = np.zeros((N, k))
        for i in range(N):
            para = np.sqrt(
                np.divide(2 * sig_mul[i, indices[i, :]],
                          sigs_sum[i, indices[i, :]]))
            kern = np.exp(-np.divide(sqdistances[i, :],
                                     (sigs_sum[i, indices[i, :]])))  # not *2
            kernel_matrix[i, :] = np.multiply(para, kern)

        weights = kernel_matrix
        indptr = range(0, (N + 1) * k, k)
        weight_matrix = sparse.csr_matrix(
            (weights.flatten(), indices.flatten(), indptr),
            shape=(N, N)).toarray()

        # symmetric
        for i, row in enumerate(indices):
            for j in row:
                if i not in set(indices[j]):
                    weight_matrix[j, i] = weight_matrix[i, j]

        # normalization
        weight_sum = np.power(weight_matrix.sum(axis=0)[:, None],
                              -1 / 2).flatten()
        weight_sum = np.diag(weight_sum)
        M = weight_sum @ weight_matrix @ weight_sum
        mevals, mevecs = sp.linalg.eigh(M)

        self.M = M
        self.mevals = mevals
        self.mevecs = mevecs
        self.indices = indices

        return M
Пример #28
0
def _apply_mask_and_get_affinity(seeds,
                                 niimg,
                                 radius,
                                 allow_overlap,
                                 mask_img=None):
    seeds = list(seeds)
    affine = niimg.get_affine()

    # Compute world coordinates of all in-mask voxels.

    if mask_img is not None:
        mask_img = check_niimg_3d(mask_img)
        mask_img = image.resample_img(mask_img,
                                      target_affine=affine,
                                      target_shape=niimg.shape[:3],
                                      interpolation='nearest')
        mask, _ = masking._load_mask_img(mask_img)
        mask_coords = list(np.where(mask != 0))

        X = masking._apply_mask_fmri(niimg, mask_img)
    else:
        mask_coords = list(zip(*np.ndindex(niimg.shape[:3])))
        X = niimg.get_data().reshape([-1, niimg.shape[3]]).T
    mask_coords = np.asarray(mask_coords)
    mask_coords = coord_transform(mask_coords[0], mask_coords[1],
                                  mask_coords[2], affine)
    mask_coords = np.asarray(mask_coords).T

    if (radius is not None
            and LooseVersion(sklearn.__version__) < LooseVersion('0.16')):
        # Fix for scikit learn versions below 0.16. See
        # https://github.com/scikit-learn/scikit-learn/issues/4072
        radius += 1e-6

    clf = neighbors.NearestNeighbors(radius=radius)
    A = clf.fit(mask_coords).radius_neighbors_graph(seeds)
    A = A.tolil()
    # Include selfs
    mask_coords = mask_coords.astype(int).tolist()
    for i, seed in enumerate(seeds):
        try:
            A[i, mask_coords.index(seed)] = True
        except ValueError:
            # seed is not in the mask
            pass

    if not allow_overlap:
        if np.any(A.sum(axis=0) >= 2):
            raise ValueError('Overlap detected between spheres')

    return X, A
Пример #29
0
def get_positive_neighbors_counts(X, y, k=None, radius=None, positive_label=1):
    assert bool(k) ^ bool(radius)
    
    if k:
        assert type(k) is int
        nn = neighbors.NearestNeighbors(n_neighbors=k + 1, algorithm="auto").fit(X)
    else:
        assert type(radius) is float
        nn = neighbors.NearestNeighbors(radius=radius, algorithm="auto").fit(X)
    
    pos_X_indices = np.where(y == positive_label)[0]
    positive_X = X[pos_X_indices]
    
    if k:
        neigh = nn.kneighbors(positive_X, return_distance=False)
        neigh = neigh[:, 1:] # remove self from neighbors
        neigh_targets = np.vectorize(lambda x: y[x])(neigh)
        neigh_shape = np.shape(neigh_targets)
        neigh_counts = np.full((neigh_shape[0],), np.float(neigh_shape[1]), dtype=np.float)
        pos_neigh_counts = np.sum(neigh_targets == positive_label, axis=1)
    else:
        neigh = nn.radius_neighbors(positive_X, return_distance=False)
        neigh = map(lambda tup: np.delete(tup[1], np.where(tup[1] == pos_X_indices[tup[0]])), enumerate(neigh)) # remove self from neighbors
        neigh_targets = map(lambda n: np.vectorize(lambda x: y[x])(n) if n.size else np.array([]), neigh)
        neigh_counts = np.array(map(lambda n: np.float(np.shape(n)[0]), neigh_targets), dtype=np.float)
        pos_neigh_counts = np.array(map(lambda n: np.sum(n == positive_label), neigh_targets))
    
    pos_neigh_proportions = pos_neigh_counts / neigh_counts
    pos_neigh_proportions = np.nan_to_num(pos_neigh_proportions) # empty neighborhoods to 0
    
#     # remove empty neighborhoods 
#     not_nan_indices = ~np.isnan(pos_neigh_proportions)
#     avg_pos_neigh_count = np.average(pos_neigh_counts[not_nan_indices])
#     avg_pos_neigh_prop = np.average(pos_neigh_proportions[not_nan_indices])
    
    avg_pos_neigh_count = np.average(pos_neigh_counts)
    avg_pos_neigh_prop = np.average(pos_neigh_proportions)
    return itertools.izip(pos_X_indices, pos_neigh_counts, pos_neigh_proportions), avg_pos_neigh_count, avg_pos_neigh_prop
Пример #30
0
def simplify_co_occurrence(co, nn=3):

    # quintile breaks (crude, still need to account for upper/diagonals)
    q = ps.Quantiles(co).yb
    q = np.reshape(q, co.shape)

    # nearest neighbors graph
    knn = neighbors.NearestNeighbors(n_neighbors=nn)
    neigh = knn.fit(co)
    knn_mat = neigh.kneighbors_graph(co).toarray()

    out = np.multiply(q, knn_mat)

    return out