Пример #1
0
def test_sinkhorn_variants():
    # test sinkhorn
    n = 100
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    M = ot.dist(x, x)

    G0 = ot.sinkhorn(u, u, M, 1, method='sinkhorn', stopThr=1e-10)
    Gs = ot.sinkhorn(u, u, M, 1, method='sinkhorn_stabilized', stopThr=1e-10)
    Ges = ot.sinkhorn(u,
                      u,
                      M,
                      1,
                      method='sinkhorn_epsilon_scaling',
                      stopThr=1e-10)
    Gerr = ot.sinkhorn(u, u, M, 1, method='do_not_exists', stopThr=1e-10)
    G_green = ot.sinkhorn(u, u, M, 1, method='greenkhorn', stopThr=1e-10)

    # check values
    np.testing.assert_allclose(G0, Gs, atol=1e-05)
    np.testing.assert_allclose(G0, Ges, atol=1e-05)
    np.testing.assert_allclose(G0, Gerr)
    np.testing.assert_allclose(G0, G_green, atol=1e-5)
    print(G0, G_green)
Пример #2
0
def test_sinkhorn_empty():
    # test sinkhorn
    n = 100
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    M = ot.dist(x, x)

    G, log = ot.sinkhorn([], [], M, 1, stopThr=1e-10, verbose=True, log=True)
    # check constratints
    np.testing.assert_allclose(u, G.sum(1), atol=1e-05)
    np.testing.assert_allclose(u, G.sum(0), atol=1e-05)

    G, log = ot.sinkhorn([], [], M, 1, stopThr=1e-10,
                         method='sinkhorn_stabilized', verbose=True, log=True)
    # check constratints
    np.testing.assert_allclose(u, G.sum(1), atol=1e-05)
    np.testing.assert_allclose(u, G.sum(0), atol=1e-05)

    G, log = ot.sinkhorn(
        [], [], M, 1, stopThr=1e-10, method='sinkhorn_epsilon_scaling',
        verbose=True, log=True)
    # check constratints
    np.testing.assert_allclose(u, G.sum(1), atol=1e-05)
    np.testing.assert_allclose(u, G.sum(0), atol=1e-05)

    # test empty weights greenkhorn
    ot.sinkhorn([], [], M, 1, method='greenkhorn', stopThr=1e-10, log=True)
Пример #3
0
def test_not_implemented_method():
    # test sinkhorn
    w = 10
    n = w**2
    rng = np.random.RandomState(42)
    A_img = rng.rand(2, w, w)
    A_flat = A_img.reshape(n, 2)
    a1, a2 = A_flat.T
    M_flat = ot.utils.dist0(n)
    not_implemented = "new_method"
    reg = 0.01
    with pytest.raises(ValueError):
        ot.sinkhorn(a1, a2, M_flat, reg, method=not_implemented)
    with pytest.raises(ValueError):
        ot.sinkhorn2(a1, a2, M_flat, reg, method=not_implemented)
    with pytest.raises(ValueError):
        ot.barycenter(A_flat, M_flat, reg, method=not_implemented)
    with pytest.raises(ValueError):
        ot.bregman.barycenter_debiased(A_flat,
                                       M_flat,
                                       reg,
                                       method=not_implemented)
    with pytest.raises(ValueError):
        ot.bregman.convolutional_barycenter2d(A_img,
                                              reg,
                                              method=not_implemented)
    with pytest.raises(ValueError):
        ot.bregman.convolutional_barycenter2d_debiased(A_img,
                                                       reg,
                                                       method=not_implemented)
Пример #4
0
def test_sinkhorn_empty():
    # test sinkhorn
    n = 100
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    M = ot.dist(x, x)

    G, log = ot.sinkhorn([], [], M, 1, stopThr=1e-10, verbose=True, log=True)
    # check constratints
    np.testing.assert_allclose(u, G.sum(1), atol=1e-05)
    np.testing.assert_allclose(u, G.sum(0), atol=1e-05)

    G, log = ot.sinkhorn([], [], M, 1, stopThr=1e-10,
                         method='sinkhorn_stabilized', verbose=True, log=True)
    # check constratints
    np.testing.assert_allclose(u, G.sum(1), atol=1e-05)
    np.testing.assert_allclose(u, G.sum(0), atol=1e-05)

    G, log = ot.sinkhorn(
        [], [], M, 1, stopThr=1e-10, method='sinkhorn_epsilon_scaling',
        verbose=True, log=True)
    # check constratints
    np.testing.assert_allclose(u, G.sum(1), atol=1e-05)
    np.testing.assert_allclose(u, G.sum(0), atol=1e-05)
Пример #5
0
def test_lazy_empirical_sinkhorn():
    # test sinkhorn
    n = 10
    a = ot.unif(n)
    b = ot.unif(n)
    numIterMax = 1000

    X_s = np.reshape(np.arange(n), (n, 1))
    X_t = np.reshape(np.arange(0, n), (n, 1))
    M = ot.dist(X_s, X_t)
    M_m = ot.dist(X_s, X_t, metric='minkowski')

    f, g = ot.bregman.empirical_sinkhorn(X_s,
                                         X_t,
                                         1,
                                         numIterMax=numIterMax,
                                         isLazy=True,
                                         batchSize=(1, 3),
                                         verbose=True)
    G_sqe = np.exp(f[:, None] + g[None, :] - M / 1)
    sinkhorn_sqe = ot.sinkhorn(a, b, M, 1)

    f, g, log_es = ot.bregman.empirical_sinkhorn(X_s,
                                                 X_t,
                                                 0.1,
                                                 numIterMax=numIterMax,
                                                 isLazy=True,
                                                 batchSize=1,
                                                 log=True)
    G_log = np.exp(f[:, None] + g[None, :] - M / 0.1)
    sinkhorn_log, log_s = ot.sinkhorn(a, b, M, 0.1, log=True)

    f, g = ot.bregman.empirical_sinkhorn(X_s,
                                         X_t,
                                         1,
                                         metric='minkowski',
                                         numIterMax=numIterMax,
                                         isLazy=True,
                                         batchSize=1)
    G_m = np.exp(f[:, None] + g[None, :] - M_m / 1)
    sinkhorn_m = ot.sinkhorn(a, b, M_m, 1)

    loss_emp_sinkhorn, log = ot.bregman.empirical_sinkhorn2(
        X_s, X_t, 1, numIterMax=numIterMax, isLazy=True, batchSize=1, log=True)
    loss_sinkhorn = ot.sinkhorn2(a, b, M, 1)

    # check constratints
    np.testing.assert_allclose(sinkhorn_sqe.sum(1), G_sqe.sum(1),
                               atol=1e-05)  # metric sqeuclidian
    np.testing.assert_allclose(sinkhorn_sqe.sum(0), G_sqe.sum(0),
                               atol=1e-05)  # metric sqeuclidian
    np.testing.assert_allclose(sinkhorn_log.sum(1), G_log.sum(1),
                               atol=1e-05)  # log
    np.testing.assert_allclose(sinkhorn_log.sum(0), G_log.sum(0),
                               atol=1e-05)  # log
    np.testing.assert_allclose(sinkhorn_m.sum(1), G_m.sum(1),
                               atol=1e-05)  # metric euclidian
    np.testing.assert_allclose(sinkhorn_m.sum(0), G_m.sum(0),
                               atol=1e-05)  # metric euclidian
    np.testing.assert_allclose(loss_emp_sinkhorn, loss_sinkhorn, atol=1e-05)
Пример #6
0
def test_sinkhorn_variants_multi_b(nx):
    # test sinkhorn
    n = 50
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    b = rng.rand(n, 3)
    b = b / np.sum(b, 0, keepdims=True)

    M = ot.dist(x, x)

    ub, bb, M_nx = nx.from_numpy(u, b, M)

    G = ot.sinkhorn(u, b, M, 1, method='sinkhorn', stopThr=1e-10)
    Gl = nx.to_numpy(
        ot.sinkhorn(ub, bb, M_nx, 1, method='sinkhorn_log', stopThr=1e-10))
    G0 = nx.to_numpy(
        ot.sinkhorn(ub, bb, M_nx, 1, method='sinkhorn', stopThr=1e-10))
    Gs = nx.to_numpy(
        ot.sinkhorn(ub,
                    bb,
                    M_nx,
                    1,
                    method='sinkhorn_stabilized',
                    stopThr=1e-10))

    # check values
    np.testing.assert_allclose(G, G0, atol=1e-05)
    np.testing.assert_allclose(G, Gl, atol=1e-05)
    np.testing.assert_allclose(G0, Gs, atol=1e-05)
Пример #7
0
def computeTransportLaplacianSymmetric_fw_sinkhorn(distances, Ss, St, xs, xt, reg=1e-9, regls=0, reglt=0, nbitermax=400,
                                                   thr_stop=1e-8, **kwargs):
    distribS = np.ones((xs.shape[0], 1)) / xs.shape[0]
    distribS = distribS.ravel()
    distribT = np.ones((xt.shape[0], 1)) / xt.shape[0]
    distribT = distribT.ravel()

    Ls = get_laplacian(Ss)
    Lt = get_laplacian(St)

    maxdist = np.max(distances)

    regmax = 300. / maxdist
    reg0 = regmax * (1 - exp(-reg / regmax))

    transp = ot.sinkhorn(distribS, distribT, distances, reg)

    niter = 1
    while True:
        old_transp = transp.copy()
        G = np.asarray(regls * get_gradient1(Ls, xt, old_transp) + reglt * get_gradient2(Lt, xs, old_transp))
        transp0 = ot.sinkhorn(distribS, distribT, distances + G, reg)
        E = transp0 - old_transp

        # do a line search for best tau
        def f(tau):
            T = (1 - tau) * old_transp + tau * transp0
            # print np.sum(T*distances),-1./reg0*np.sum(T*np.log(T)),regls*quadloss1(T,Ls,xt),reglt*quadloss2(T,Lt,xs)
            return np.sum(T * distances) + 1. / reg0 * np.sum(T * np.log(T)) + \
                   regls * quadloss1(T, Ls, xt) + reglt * quadloss2(T, Lt, xs)

        # compute f'(0)
        res = regls * (np.trace(np.dot(xt.T, np.dot(E.T, np.dot(Ls, np.dot(old_transp, xt))))) + \
                       np.trace(np.dot(xt.T, np.dot(old_transp.T, np.dot(Ls, np.dot(E, xt)))))) \
              + reglt * (np.trace(np.dot(xs.T, np.dot(E, np.dot(Lt, np.dot(old_transp.T, xs))))) + \
                         np.trace(np.dot(xs.T, np.dot(old_transp, np.dot(Lt, np.dot(E.T, xs))))))

        # derphi_zero = np.sum(E*distances) - np.sum(1+E*np.log(old_transp))/reg + res
        derphi_zero = np.sum(E * distances) + np.sum(E * (1 + np.log(old_transp))) / reg0 + res

        tau, cost = ln.scalar_search_armijo(f, f(0), derphi_zero, alpha0=0.99)

        if tau is None:
            break
        transp = (1 - tau) * old_transp + tau * transp0

        err = np.sum(np.fabs(E))

        if niter >= nbitermax or  err < thr_stop:
            break
        niter += 1

        if niter % 100 == 0:
            print('{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
            print('{:5d}|{:8e}|'.format(niter, err))

    return transp
Пример #8
0
def test_sinkhorn_identity():

    x = tf.constant([[3.2, 1.0, 1.0], [1.0, 1.4, 1.4]], dtype=tf.float32)

    p1 = sinkhorn(x, x).numpy()
    p2 = sinkhorn(x, x).numpy()

    assert abs(p1 - 0) < 1e-8
    assert abs(p2 - 0) < 1e-8
Пример #9
0
def test_sinkhorn_symmetry():
    x = tf.constant([[3.2, 1.0, 1.0], [1.0, 1.4, 1.4]], dtype=tf.float32)
    y = tf.constant([[8.9, 12.0, 15.0], [11.0, 12.7, 13.4], [19.0, 13.0, 14.4],
                     [21.0, 5.0, 14.2]])
    s1 = sinkhorn(x, y).numpy()
    s2 = sinkhorn(y, x).numpy()
    print("s1", s1)
    print("s2", s2)
    assert abs(s1 - s2) < 1e-8
Пример #10
0
def test_nan_warning(method):
    # test sinkhorn
    n = 100
    a1 = ot.datasets.make_1D_gauss(n, m=30, s=10)
    a2 = ot.datasets.make_1D_gauss(n, m=40, s=10)

    M = ot.utils.dist0(n)
    reg = 0
    with pytest.warns(UserWarning):
        # warn set to False to avoid catching a convergence warning instead
        ot.sinkhorn(a1, a2, M, reg, method=method, warn=False)
Пример #11
0
def test_dual_sgd_sinkhorn():
    # test all dual algorithms
    n = 10
    reg = 1
    nb_iter = 15000
    batch_size = 10
    rng = np.random.RandomState(0)

    # Test uniform
    x = rng.randn(n, 2)
    u = ot.utils.unif(n)
    M = ot.dist(x, x)

    G_sgd = ot.stochastic.solve_dual_entropic(u,
                                              u,
                                              M,
                                              reg,
                                              batch_size,
                                              numItermax=nb_iter)

    G_sinkhorn = ot.sinkhorn(u, u, M, reg)

    # check constratints
    np.testing.assert_allclose(G_sgd.sum(1), G_sinkhorn.sum(1), atol=1e-03)
    np.testing.assert_allclose(G_sgd.sum(0), G_sinkhorn.sum(0), atol=1e-03)
    np.testing.assert_allclose(G_sgd, G_sinkhorn,
                               atol=1e-03)  # cf convergence sgd

    # Test gaussian
    n = 30
    reg = 1
    batch_size = 30

    a = ot.datasets.make_1D_gauss(n, 15, 5)  # m= mean, s= std
    b = ot.datasets.make_1D_gauss(n, 15, 5)
    X_source = np.arange(n, dtype=np.float64)
    Y_target = np.arange(n, dtype=np.float64)
    M = ot.dist(X_source.reshape((n, 1)), Y_target.reshape((n, 1)))
    M /= M.max()

    G_sgd = ot.stochastic.solve_dual_entropic(a,
                                              b,
                                              M,
                                              reg,
                                              batch_size,
                                              numItermax=nb_iter)

    G_sinkhorn = ot.sinkhorn(a, b, M, reg)

    # check constratints
    np.testing.assert_allclose(G_sgd.sum(1), G_sinkhorn.sum(1), atol=1e-03)
    np.testing.assert_allclose(G_sgd.sum(0), G_sinkhorn.sum(0), atol=1e-03)
    np.testing.assert_allclose(G_sgd, G_sinkhorn,
                               atol=1e-03)  # cf convergence sgd
Пример #12
0
def test_sinkhorn_variants_log():
    # test sinkhorn
    n = 50
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    M = ot.dist(x, x)

    G0, log0 = ot.sinkhorn(u,
                           u,
                           M,
                           1,
                           method='sinkhorn',
                           stopThr=1e-10,
                           log=True)
    Gl, logl = ot.sinkhorn(u,
                           u,
                           M,
                           1,
                           method='sinkhorn_log',
                           stopThr=1e-10,
                           log=True)
    Gs, logs = ot.sinkhorn(u,
                           u,
                           M,
                           1,
                           method='sinkhorn_stabilized',
                           stopThr=1e-10,
                           log=True)
    Ges, loges = ot.sinkhorn(
        u,
        u,
        M,
        1,
        method='sinkhorn_epsilon_scaling',
        stopThr=1e-10,
        log=True,
    )
    G_green, loggreen = ot.sinkhorn(u,
                                    u,
                                    M,
                                    1,
                                    method='greenkhorn',
                                    stopThr=1e-10,
                                    log=True)

    # check values
    np.testing.assert_allclose(G0, Gs, atol=1e-05)
    np.testing.assert_allclose(G0, Gl, atol=1e-05)
    np.testing.assert_allclose(G0, Ges, atol=1e-05)
    np.testing.assert_allclose(G0, G_green, atol=1e-5)
Пример #13
0
def objective(X, Y, R, n=1000):
    if n > len(X):
        n = len(X)
    Xn, Yn = X[:n], Y[:n]
    C = -np.dot(np.dot(Xn, R), Yn.T)
    P = ot.sinkhorn(np.ones(n), np.ones(n), C, 0.025, stopThr=1e-3)
    return 1000 * np.linalg.norm(np.dot(Xn, R) - np.dot(P, Yn)) / n
Пример #14
0
def procwass_loss(mapped, target, R, device='cpu', reg=0.025):
    # # use Procrustes Wasserstein loss.
    # Ux, Sx, VTx = torch.linalg.svd(self.Rx)
    # self.Rx = torch.mm(Ux, VTx)
    # pw_loss_x = procwass_loss(y_mapped[y_intersect],
    #                           self.x_emb.weight[x_intersect],
    #                           self.Rx, device=self.device,
    #                           reg=20)
    # losses['pw_x'] = pw_loss_x
    # Uy, Sy, VTy = torch.linalg.svd(self.Ry)
    # self.Ry = torch.mm(Uy, VTy)
    # pw_loss_y = procwass_loss(x_mapped[x_intersect],
    #                           self.y_emb.weight[y_intersect],
    #                           self.Ry, device=self.device,
    #                           reg=20)
    # losses['pw_y'] = pw_loss_y
    #

    #  Procrustes / Wasserstein loss
    # R must be saved over epochs and initialised by calling convex_init
    # mapped, target must be same size
    C = -torch.mm(torch.mm(mapped, R), target.T)
    n = mapped.shape[0]
    onesn = torch.ones(n).to(device)
    P = ot.sinkhorn(onesn, onesn, C.detach(), reg, stopThr=1e-3)
    loss = (1000 *
            torch.linalg.norm(torch.mm(mapped, R) - torch.mm(P, target)) / n)
    return loss
Пример #15
0
def OT_Resampling(particles, weights, size, reg=.1):
    M = np.matrix([[np.linalg.norm(xi - xj)**2 for xi in particles]
                   for xj in particles])
    row = np.array([1 / size] * size)
    weight_matrix = ot.sinkhorn(row, weights, M, reg=reg)
    weight_matrix = weight_matrix * size
    try:
        indices = [
            random.choice(range(len(particles)), p=w) for w in weight_matrix
        ]
    except:
        try:
            weight_matrix = np.apply_along_axis(
                lambda x: x - np.min(np.min(x), 0), axis=1, arr=weight_matrix)
            weight_matrix = np.apply_along_axis(lambda x: x / np.sum(x),
                                                axis=1,
                                                arr=weight_matrix)
            indices = [
                random.choice(range(len(particles)), p=w)
                for w in weight_matrix
            ]
        except:
            indices = [
                random.choice(range(len(particles)), p=weights)
                for _ in range(size)
            ]
    res = np.array([particles[i] for i in indices])
    return res
Пример #16
0
def test_gpu_old_doctests():
    a = [.5, .5]
    b = [.5, .5]
    M = [[0., 1.], [1., 0.]]
    G = ot.sinkhorn(a, b, M, 1)
    np.testing.assert_allclose(G, np.array([[0.36552929, 0.13447071],
                                            [0.13447071, 0.36552929]]))
Пример #17
0
def OT_scores_sinkhorn(Xl, Xr, scoresl, scoresr, mu=0.5, lambd=1e-2):
    # loss matrix
    C = ot.dist(Xl, Xr) + mu * ot.dist(np.expand_dims(scoresl, axis=1),
                                       np.expand_dims(scoresr, axis=1))
    M = C / C.max()

    n = len(Xl)
    m = len(Xr)
    print(n, m)

    a, b = scoresl, scoresr

    Gs = ot.sinkhorn(a, b, M, lambd)

    plt.figure(5)
    plt.imshow(Gs, interpolation='nearest')
    plt.title('OT matrix sinkhorn')

    plt.figure(6)
    ot.plot.plot2D_samples_mat(Xl[:, :2], Xr[:, :2], Gs, color=[.5, .5, 1])
    plt.plot(Xl[:, 0], Xl[:, 1], '+b', label='Source samples')
    plt.plot(Xr[:, 0], Xr[:, 1], 'xr', label='Target samples')
    plt.legend(loc=0)
    plt.title('OT matrix Sinkhorn with samples')

    return (Gs)
Пример #18
0
def OT_sinkhorn(Xl, Xr, lambd=1e-2):
    # loss matrix
    C = ot.dist(Xl, Xr) + ll * ot.dist(hl, hr)
    M = C / C.max()

    n = len(Xl)
    m = len(Xr)
    print(n, m)

    a, b = np.ones((n, )) / n, np.ones((m, )) / m

    Gs = ot.sinkhorn(a, b, M, lambd)

    plt.figure(5)
    plt.imshow(Gs, interpolation='nearest')
    plt.title('OT matrix sinkhorn')

    plt.figure(6)
    ot.plot.plot2D_samples_mat(Xl[:, :2], Xr[:, :2], Gs, color=[.5, .5, 1])
    plt.plot(Xl[:, 0], Xl[:, 1], '+b', label='Source samples')
    plt.plot(Xr[:, 0], Xr[:, 1], 'xr', label='Target samples')
    plt.legend(loc=0)
    plt.title('OT matrix Sinkhorn with samples')

    return (Gs)
Пример #19
0
def _compute_wasserstein_distance(label_sequences, sinkhorn=False, 
                                    categorical=False, sinkhorn_lambda=1e-2):
    '''
    Generate the Wasserstein distance matrix for the graphs embedded 
    in label_sequences
    '''
    # Get the iteration number from the embedding file
    n = len(label_sequences)
    
    M = np.zeros((n,n))
    # Iterate over pairs of graphs
    for graph_index_1, graph_1 in enumerate(label_sequences):
        # Only keep the embeddings for the first h iterations
        labels_1 = label_sequences[graph_index_1]
        for graph_index_2, graph_2 in enumerate(label_sequences[graph_index_1:]):
            labels_2 = label_sequences[graph_index_2 + graph_index_1]
            # Get cost matrix
            ground_distance = 'hamming' if categorical else 'euclidean'
            costs = ot.dist(labels_1, labels_2, metric=ground_distance)

            if sinkhorn:
                mat = ot.sinkhorn(np.ones(len(labels_1))/len(labels_1), 
                                    np.ones(len(labels_2))/len(labels_2), costs, sinkhorn_lambda, 
                                    numItermax=50)
                M[graph_index_1, graph_index_2 + graph_index_1] = np.sum(np.multiply(mat, costs))
            else:
                M[graph_index_1, graph_index_2 + graph_index_1] = \
                    ot.emd2([], [], costs)
                    
    M = (M + M.T)
    return M
Пример #20
0
def test_sinkhorn_multi_b(method, verbose, warn):
    # test sinkhorn
    n = 10
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    b = rng.rand(n, 3)
    b = b / np.sum(b, 0, keepdims=True)

    M = ot.dist(x, x)

    loss0, log = ot.sinkhorn(u,
                             b,
                             M,
                             .1,
                             method=method,
                             stopThr=1e-10,
                             log=True)

    loss = [
        ot.sinkhorn2(u,
                     b[:, k],
                     M,
                     .1,
                     method=method,
                     stopThr=1e-10,
                     verbose=verbose,
                     warn=warn) for k in range(3)
    ]
    # check constraints
    np.testing.assert_allclose(loss0, loss,
                               atol=1e-4)  # cf convergence sinkhorn
Пример #21
0
def predict(X_test, W, Y, X, M, reg, knn_number=10):
    """
    calculate the h(X), then bi-cluster it. justify which cluster is closet to X_test, then choose knn from this cluster
    to average as the final label
    :param X_test: the test feature dimension: n*d
    :param W: the weight of h(x) dimension:L*d
    :param X: the train feature: m*d
    :param Y: the train label:m*L
    :return: Y_test
    """
    temp_HX = np.exp(np.dot(W, X.T))
    temp_HX_sum = np.sum(temp_HX, axis=0).reshape(1, -1)
    H_X = temp_HX/temp_HX_sum  #dimension: L*m
    H_X = H_X.T #dimension: m*L

    test_HX = np.exp(np.dot(W, X_test.T))
    test_HX_sum = np.sum(test_HX, axis=0).reshape(1, -1)
    pre_H_X = test_HX/test_HX_sum
    pre_H_X = pre_H_X.T #dimension: n*L
    pre_y = []
    weight = list(range(2*(knn_number-1), -1, -2))
    weight = [t/(knn_number*(knn_number-1)) for t in weight]
    for i in range(pre_H_X.shape[0]):
        pre_label = 0
        pre_temp = pre_H_X[i]
        ot_loss, couple, u, v = ot.sinkhorn(pre_temp, H_X.T, M, reg)
        dis_index = np.argsort(ot_loss)
        for j in range(knn_number):
            pre_label = pre_label + weight[j]*Y[dis_index[j]]
        pre_y.append(pre_label)
    pre_y = np.array(pre_y)
    return pre_y
Пример #22
0
def align(X,
          Y,
          R,
          lr=10.,
          bsz=200,
          nepoch=5,
          niter=1000,
          nmax=10000,
          reg=0.05,
          verbose=True):
    for epoch in range(1, nepoch + 1):
        for _it in range(1, niter + 1):
            # sample mini-batch
            xt = X[np.random.permutation(nmax)[:bsz], :]
            yt = Y[np.random.permutation(nmax)[:bsz], :]
            # compute OT on minibatch
            C = -np.dot(np.dot(xt, R), yt.T)
            P = ot.sinkhorn(np.ones(bsz), np.ones(bsz), C, reg, stopThr=1e-3)
            # compute gradient
            G = -np.dot(xt.T, np.dot(P, yt))
            R -= lr / bsz * G
            # project on orthogonal matrices
            U, s, VT = np.linalg.svd(R)
            R = np.dot(U, VT)
        bsz *= 2
        niter //= 4
        if verbose:
            print("epoch: %d  obj: %.3f" % (epoch, objective(X, Y, R)))
    return R
Пример #23
0
def parallel_Distance(para):

    dis_to_center = []
    for sample_id in range(para['m']):

        #Todo: Compute wasserstein distance
        sample_prob = para['probs'][
            0, para['posvec'][sample_id]:para['posvec'][sample_id + 1]]
        X = para['supps'][:,
                          para['posvec'][sample_id]:para['posvec'][sample_id +
                                                                   1]]
        center_prob = para['centers_probs'][0, para['center_id'] *
                                            para['n']:(para['center_id'] + 1) *
                                            para['n']]
        Y = para['centers_supps'][:, para['center_id'] *
                                  para['n']:(para['center_id'] + 1) *
                                  para['n']]
        ones = np.ones([para['stride'][0, sample_id], para['n']])
        cost_mat = np.diag(np.diag(X.T.dot(X))).dot(ones) + ones.dot(
            np.diag(np.diag(Y.T.dot(Y)))) - 2 * X.T.dot(Y)

        pi = ot.sinkhorn(sample_prob,
                         center_prob,
                         cost_mat,
                         para['otreg'],
                         stopThr=1e-3)
        dis_to_center.append(np.sum(pi * cost_mat))
    return dis_to_center
Пример #24
0
def test_convergence_warning(method):
    # test sinkhorn
    n = 100
    a1 = ot.datasets.make_1D_gauss(n, m=30, s=10)
    a2 = ot.datasets.make_1D_gauss(n, m=40, s=10)
    A = np.asarray([a1, a2]).T
    M = ot.utils.dist0(n)

    with pytest.warns(UserWarning):
        ot.sinkhorn(a1, a2, M, 1., method=method, stopThr=0, numItermax=1)

    if method in ["sinkhorn", "sinkhorn_stabilized", "sinkhorn_log"]:
        with pytest.warns(UserWarning):
            ot.barycenter(A, M, 1, method=method, stopThr=0, numItermax=1)
        with pytest.warns(UserWarning):
            ot.sinkhorn2(a1, a2, M, 1, method=method, stopThr=0, numItermax=1)
Пример #25
0
def test_sinkhorn():
    T_pot = ot.sinkhorn(a, b, M, epsilon, method='sinkhorn_stabilized')
    W_pot = np.sum(np.multiply(np.asarray(T_pot), M))
    W_our, *_, T_our = sinkhorn_iteration(Mt, at, bt, epsilon)

    assert diff_array_tensor(W_pot, W_our) < thr
    assert diff_array_tensor(T_pot, T_our) < thr
Пример #26
0
def test_smooth_ot_semi_dual():

    # get data
    n = 100
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    M = ot.dist(x, x)

    with pytest.raises(NotImplementedError):
        Gl2, log = ot.smooth.smooth_ot_semi_dual(u, u, M, 1, reg_type='none')

    Gl2, log = ot.smooth.smooth_ot_semi_dual(u, u, M, 1, reg_type='l2', log=True, stopThr=1e-10)

    # check constraints
    np.testing.assert_allclose(
        u, Gl2.sum(1), atol=1e-05)  # cf convergence sinkhorn
    np.testing.assert_allclose(
        u, Gl2.sum(0), atol=1e-05)  # cf convergence sinkhorn

    # kl regularisation
    G = ot.smooth.smooth_ot_semi_dual(u, u, M, 1, reg_type='kl', stopThr=1e-10)

    # check constraints
    np.testing.assert_allclose(
        u, G.sum(1), atol=1e-05)  # cf convergence sinkhorn
    np.testing.assert_allclose(
        u, G.sum(0), atol=1e-05)  # cf convergence sinkhorn

    G2 = ot.sinkhorn(u, u, M, 1, stopThr=1e-10)
    np.testing.assert_allclose(G, G2, atol=1e-05)
Пример #27
0
def test_gpu_sinkhorn():

    rng = np.random.RandomState(0)

    for n_samples in [50, 100, 500, 1000]:
        a = rng.rand(n_samples // 4, 100)
        b = rng.rand(n_samples, 100)

        wa = ot.unif(n_samples // 4)
        wb = ot.unif(n_samples)

        wb2 = np.random.rand(n_samples, 20)
        wb2 /= wb2.sum(0, keepdims=True)

        M = ot.dist(a.copy(), b.copy())
        M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False)

        reg = 1

        G = ot.sinkhorn(wa, wb, M, reg)
        G1 = ot.gpu.sinkhorn(wa, wb, M, reg)

        np.testing.assert_allclose(G1, G, rtol=1e-10)

        # run all on gpu
        ot.gpu.sinkhorn(wa, wb, M2, reg, to_numpy=False, log=True)

        # run sinkhorn for multiple targets
        ot.gpu.sinkhorn(wa, wb2, M2, reg, to_numpy=False, log=True)
Пример #28
0
def ot_extended(mu,
                nu,
                c,
                SS,
                S_coordinates,
                epsilon,
                log=False,
                optimizing=False):
    if SS is not None:
        cost = c + np.tensordot(SS, S_coordinates, [0, 0])
    else:
        cost = c

    with warnings.catch_warnings():
        warnings.filterwarnings('error')
        try:
            res_ot = ot.sinkhorn(mu, nu, cost, epsilon, log=log)
        except:
            if optimizing:
                print(cost)
                raise RuntimeError("optimization failed at ot_extended")

    pi = res_ot[0] if log else res_ot
    val = np.sum(pi * cost) - epsilon * ss.entropy(pi.flatten())
    if optimizing:
        grads = np.sum(SS * pi, axis=(1, 2))
        return val, grads
    else:
        return val, res_ot
Пример #29
0
def test_sag_asgd_sinkhorn():
    # test all algorithms
    n = 15
    reg = 1
    nb_iter = 100000
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)
    M = ot.dist(x, x)

    G_asgd = ot.stochastic.solve_semi_dual_entropic(u,
                                                    u,
                                                    M,
                                                    reg,
                                                    "asgd",
                                                    numItermax=nb_iter)
    G_sag = ot.stochastic.solve_semi_dual_entropic(u,
                                                   u,
                                                   M,
                                                   reg,
                                                   "sag",
                                                   numItermax=nb_iter)
    G_sinkhorn = ot.sinkhorn(u, u, M, reg)

    # check constratints
    np.testing.assert_allclose(G_sag.sum(1), G_sinkhorn.sum(1), atol=1e-03)
    np.testing.assert_allclose(G_sag.sum(0), G_sinkhorn.sum(0), atol=1e-03)
    np.testing.assert_allclose(G_asgd.sum(1), G_sinkhorn.sum(1), atol=1e-03)
    np.testing.assert_allclose(G_asgd.sum(0), G_sinkhorn.sum(0), atol=1e-03)
    np.testing.assert_allclose(G_sag, G_sinkhorn,
                               atol=1e-03)  # cf convergence sag
    np.testing.assert_allclose(G_asgd, G_sinkhorn,
                               atol=1e-03)  # cf convergence asgd
Пример #30
0
def test_dual_sgd_sinkhorn():
    # test all dual algorithms
    n = 10
    reg = 1
    nb_iter = 300000
    batch_size = 8
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)
    zero = np.zeros(n)
    M = ot.dist(x, x)

    G_sgd = ot.stochastic.solve_dual_entropic(u,
                                              u,
                                              M,
                                              reg,
                                              batch_size,
                                              numItermax=nb_iter)

    G_sinkhorn = ot.sinkhorn(u, u, M, reg)

    # check constratints
    np.testing.assert_allclose(zero, (G_sgd - G_sinkhorn).sum(1),
                               atol=1e-02)  # cf convergence sgd
    np.testing.assert_allclose(zero, (G_sgd - G_sinkhorn).sum(0),
                               atol=1e-02)  # cf convergence sgd
    np.testing.assert_allclose(G_sgd, G_sinkhorn,
                               atol=1e-02)  # cf convergence sgd
Пример #31
0
def test_sinkhorn_backends(nx):
    n_samples = 100
    n_features = 2
    rng = np.random.RandomState(0)

    x = rng.randn(n_samples, n_features)
    y = rng.randn(n_samples, n_features)
    a = ot.utils.unif(n_samples)

    M = ot.dist(x, y)

    G = ot.sinkhorn(a, a, M, 1)

    ab, M_nx = nx.from_numpy(a, M)

    Gb = ot.sinkhorn(ab, ab, M_nx, 1)

    np.allclose(G, nx.to_numpy(Gb))
Пример #32
0
def test_sinkhorn_variants():
    # test sinkhorn
    n = 100
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    M = ot.dist(x, x)

    G0 = ot.sinkhorn(u, u, M, 1, method='sinkhorn', stopThr=1e-10)
    Gs = ot.sinkhorn(u, u, M, 1, method='sinkhorn_stabilized', stopThr=1e-10)
    Ges = ot.sinkhorn(
        u, u, M, 1, method='sinkhorn_epsilon_scaling', stopThr=1e-10)
    Gerr = ot.sinkhorn(u, u, M, 1, method='do_not_exists', stopThr=1e-10)

    # check values
    np.testing.assert_allclose(G0, Gs, atol=1e-05)
    np.testing.assert_allclose(G0, Ges, atol=1e-05)
    np.testing.assert_allclose(G0, Gerr)
Пример #33
0
def convex_init(X, Y, niter=100, reg=0.05, apply_sqrt=False):
    n, d = X.shape
    if apply_sqrt:
        X, Y = sqrt_eig(X), sqrt_eig(Y)
    K_X, K_Y = np.dot(X, X.T), np.dot(Y, Y.T)
    K_Y *= np.linalg.norm(K_X) / np.linalg.norm(K_Y)
    K2_X, K2_Y = np.dot(K_X, K_X), np.dot(K_Y, K_Y)
    P = np.ones([n, n]) / float(n)
    for it in range(1, niter + 1):
        G = np.dot(P, K2_X) + np.dot(K2_Y, P) - 2 * np.dot(K_Y, np.dot(P, K_X))
        q = ot.sinkhorn(np.ones(n), np.ones(n), G, reg, stopThr=1e-3)
        alpha = 2.0 / float(2.0 + it)
        P = alpha * q + (1.0 - alpha) * P
    obj = np.linalg.norm(np.dot(P, K_X) - np.dot(K_Y, P))
    print(obj)
    return procrustes(np.dot(P, X), Y).T
Пример #34
0
def test_sinkhorn():
    # test sinkhorn
    n = 100
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    M = ot.dist(x, x)

    G = ot.sinkhorn(u, u, M, 1, stopThr=1e-10)

    # check constratints
    np.testing.assert_allclose(
        u, G.sum(1), atol=1e-05)  # cf convergence sinkhorn
    np.testing.assert_allclose(
        u, G.sum(0), atol=1e-05)  # cf convergence sinkhorn
Пример #35
0
def jdot_krr(X,y,Xtest,gamma_g=1, numIterBCD = 10, alpha=1,lambd=1e1, 
             method='emd',reg=1,ktype='linear'):
    # Initializations
    n = X.shape[0]
    ntest = Xtest.shape[0]
    wa=np.ones((n,))/n
    wb=np.ones((ntest,))/ntest

    # original loss
    C0=cdist(X,Xtest,metric='sqeuclidean')
    #print np.max(C0)
    C0=C0/np.median(C0)

    # classifier    
    g = classif.KRRClassifier(lambd)

    # compute kernels
    if ktype=='rbf':
        Kt=sklearn.metrics.pairwise.rbf_kernel(Xtest,Xtest,gamma=gamma_g)
    else:
        Kt=sklearn.metrics.pairwise.linear_kernel(Xtest,Xtest)

    C = alpha*C0#+ cdist(y,ypred,metric='sqeuclidean')
    k=0
    while (k<numIterBCD):# and not changeLabels:
        k=k+1
        if method=='sinkhorn':
            G = ot.sinkhorn(wa,wb,C,reg)
        if method=='emd':
            G=  ot.emd(wa,wb,C)

        Yst=ntest*G.T.dot(y)

        g.fit(Kt,Yst)
        ypred=g.predict(Kt)
       
        # function cost
        fcost = cdist(y,ypred,metric='sqeuclidean')

        C=alpha*C0+fcost
            
    return g,np.sum(G*(fcost))    
Пример #36
0
def align(X, Y, R, lr=10., bsz=200, nepoch=5, niter=1000,
          nmax=10000, reg=0.05, verbose=True):
    for epoch in range(1, nepoch + 1):
        for _it in range(1, niter + 1):
            # sample mini-batch
            xt = X[np.random.permutation(nmax)[:bsz], :]
            yt = Y[np.random.permutation(nmax)[:bsz], :]
            # compute OT on minibatch
            C = -np.dot(np.dot(xt, R), yt.T)
            P = ot.sinkhorn(np.ones(bsz), np.ones(bsz), C, reg, stopThr=1e-3)
            # compute gradient
            G = - np.dot(xt.T, np.dot(P, yt))
            R -= lr / bsz * G
            # project on orthogonal matrices
            U, s, VT = np.linalg.svd(R)
            R = np.dot(U, VT)
        bsz *= 2
        niter //= 4
        if verbose:
            print("epoch: %d  obj: %.3f" % (epoch, objective(X, Y, R)))
    return R
Пример #37
0
def jdot_nn_l2(get_model,X,Y,Xtest,ytest=[],fit_params={},reset_model=True, numIterBCD = 10, alpha=1,method='emd',reg=1,nb_epoch=100,batch_size=10):
    # get model should return a new model compiled with l2 loss
    
    
    # Initializations
    n = X.shape[0]
    ntest = Xtest.shape[0]
    wa=np.ones((n,))/n
    wb=np.ones((ntest,))/ntest

    # original loss
    C0=cdist(X,Xtest,metric='sqeuclidean')
    C0=C0/np.max(C0)

    # classifier    
    g = get_model()
        
    TBR = []
    sav_fcost = []
    sav_totalcost = []

    results = {}

    #Init initial g(.)
    g.fit(X,Y,**fit_params)
    ypred=g.predict(Xtest)

    C = alpha*C0+ cdist(Y,ypred,metric='sqeuclidean')

    # do it only if the final labels were given
    if len(ytest):
        ydec=np.argmax(ypred,1)+1
        TBR1=np.mean(ytest==ydec)
        TBR.append(TBR1)

    k=0
    changeLabels=False
    while (k<numIterBCD):# and not changeLabels:
        k=k+1
        if method=='sinkhorn':
            G = ot.sinkhorn(wa,wb,C,reg)
        if method=='emd':
            G=  ot.emd(wa,wb,C)

        Yst=ntest*G.T.dot(Y)
        
        if reset_model:
            g=get_model()

        g.fit(Xtest,Yst,**fit_params)
        ypred=g.predict(Xtest)
        
        # function cost
        fcost = cdist(Y,ypred,metric='sqeuclidean')
        #pl.figure()
        #pl.imshow(fcost)
        #pl.show()

        C=alpha*C0+fcost

        ydec_tmp=np.argmax(ypred,1)+1
        if k>1:
            changeLabels=np.all(ydec_tmp==ydec)
            sav_fcost.append(np.sum(G*fcost))
            sav_totalcost.append(np.sum(G*(alpha*C0+fcost)))

        ydec=ydec_tmp
        if len(ytest):
            TBR1=np.mean((ytest-ypred)**2)
            TBR.append(TBR1)
            
    results['ypred0']=ypred
    results['ypred']=np.argmax(ypred,1)+1
    if len(ytest):
        results['mse']=TBR
    results['clf']=g
    results['fcost']=sav_fcost
    results['totalcost']=sav_totalcost
    return g,results    
Пример #38
0
def objective(X, Y, R, n=5000):
    Xn, Yn = X[:n], Y[:n]
    C = -np.dot(np.dot(Xn, R), Yn.T)
    P = ot.sinkhorn(np.ones(n), np.ones(n), C, 0.025, stopThr=1e-3)
    return 1000 * np.linalg.norm(np.dot(Xn, R) - np.dot(P, Yn)) / n
Пример #39
0
ot.plot.plot2D_samples_mat(xs, xt, G0, c=[.5, .5, 1])
pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
pl.legend(loc=0)
pl.title('OT matrix with samples')


##############################################################################
# Compute Sinkhorn
# ----------------

#%% sinkhorn

# reg term
lambd = 1e-3

Gs = ot.sinkhorn(a, b, M, lambd)

pl.figure(5)
pl.imshow(Gs, interpolation='nearest')
pl.title('OT matrix sinkhorn')

pl.figure(6)
ot.plot.plot2D_samples_mat(xs, xt, Gs, color=[.5, .5, 1])
pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
pl.legend(loc=0)
pl.title('OT matrix Sinkhorn with samples')

pl.show()
Пример #40
0
def jdot_svm(X,y,Xtest,  
                      ytest=[],gamma_g=1, numIterBCD = 10, alpha=1,
                      lambd=1e1, method='emd',reg_sink=1,ktype='linear'):
    # Initializations
    n = X.shape[0]
    ntest = Xtest.shape[0]
    wa=np.ones((n,))/n
    wb=np.ones((ntest,))/ntest

    # original loss
    C0=cdist(X,Xtest,metric='sqeuclidean')

    # classifier    
    g = classif.SVMClassifier(lambd)

    # compute kernels
    if ktype=='rbf':
        Kt=sklearn.metrics.pairwise.rbf_kernel(Xtest,gamma=gamma_g)
        #Ks=sklearn.metrics.pairwise.rbf_kernel(X,gamma=gamma_g)
    else:
        Kt=sklearn.metrics.pairwise.linear_kernel(Xtest)
        #Ks=sklearn.metrics.pairwise.linear_kernel(X)
        
    TBR = []
    sav_fcost = []
    sav_totalcost = []

    results = {}
    ypred=np.zeros(y.shape)

    Chinge=np.zeros(C0.shape)
    C=alpha*C0+Chinge
    
    # do it only if the final labels were given
    if len(ytest):
        TBR.append(np.mean(ytest==np.argmax(ypred,1)+1))

    k=0
    while (k<numIterBCD):
        k=k+1
        if method=='sinkhorn':
            G = ot.sinkhorn(wa,wb,C,reg_sink)
        if method=='emd':
            G=  ot.emd(wa,wb,C)

        if k>1:
            sav_fcost.append(np.sum(G*Chinge))
            sav_totalcost.append(np.sum(G*(alpha*C0+Chinge)))

            
        Yst=ntest*G.T.dot((y+1)/2.)
        #Yst=ntest*G.T.dot(y_f)
        g.fit(Kt,Yst)
        ypred=g.predict(Kt)

        
        Chinge=classif.loss_hinge(y,ypred)
        #Chinge=SVMclassifier.loss_hinge(y_f*2-1,ypred*2-1)
        
        C=alpha*C0+Chinge

        if len(ytest):
            TBR1=np.mean(ytest==np.argmax(ypred,1)+1)
            TBR.append(TBR1)
            

    results['ypred']=np.argmax(ypred,1)+1
    if len(ytest):
        results['TBR']=TBR

    results['clf']=g
    results['G']=G
    results['fcost']=sav_fcost
    results['totalcost']=sav_totalcost
    return g,results