示例#1
0
def joint_information(x, y, n_neighbors=3, random_noise=0.3):
    n_samples = x.size

    if random_noise:
        x = with_added_white_noise(x, random_noise)
        y = with_added_white_noise(y, random_noise)

    x = x.reshape((-1, 1))
    y = y.reshape((-1, 1))
    xy = np.hstack((x, y))

    # Here we rely on NearestNeighbors to select the fastest algorithm.
    nn = NearestNeighbors(metric='chebyshev', n_neighbors=n_neighbors)

    nn.fit(xy)
    radius = nn.kneighbors()[0]
    radius = np.nextafter(radius[:, -1], 0)

    # Algorithm is selected explicitly to allow passing an array as radius
    # later (not all algorithms support this).
    nn.set_params(algorithm='kd_tree')

    nn.fit(x)
    ind = nn.radius_neighbors(radius=radius, return_distance=False)
    nx = np.array([i.size for i in ind])

    nn.fit(y)
    ind = nn.radius_neighbors(radius=radius, return_distance=False)
    ny = np.array([i.size for i in ind])

    mi = (digamma(n_samples) + digamma(n_neighbors) -
          np.mean(digamma(nx + 1)) - np.mean(digamma(ny + 1)))

    return max(0, mi)
示例#2
0
def kmeanspp(X, k, seed):
    # That we need to do this is a bug in _init_centroids
    x_squared_norms = row_norms(X, squared=True)
    # Use k-means++ to initialise the centroids
    centroids = _init_centroids(X, k, 'k-means++', random_state=seed, x_squared_norms=x_squared_norms)
    # OK, we should just short-circuit and get these from k-means++...
    # quick and dirty solution
    nns = NearestNeighbors()
    nns.fit(X)
    centroid_candidatess = nns.radius_neighbors(X=centroids, radius=0, return_distance=False)
    # Account for "degenerated" solutions: serveral voxels at distance 0, each becoming a centroid
    centroids = set()
    for centroid_candidates in centroid_candidatess:
        centroid_candidates = set(centroid_candidates) - centroids
        if len(set(centroid_candidates) - centroids) == 0:
            raise Exception('Cannot get an unambiguous set of centers;'
                            'theoretically this cannot happen, so check for bugs')
        centroids.add(centroid_candidates.pop())
    return np.array(sorted(centroids))