def incremental_bary_map_emd(xs, xt, a, b, m1, m2, k):
    '''
    Compute the incomplete minibatch barycenter mapping
      between a source and a target distributions. 
      (faster for small batch size)

    Parameters
    ----------
    - xs : ndarray(ns, d)
        source data
    - xt : ndarray(nt, d)
        target data
    - a : ndarray(ns)
        source distribution weights
    - b : ndarray(nt)
        target distribution weights
    - m1 : int
        source batch size
    - m2 : int
        target batch size
    - k : int
        number of batch couples

    Returns
    -------
    - new_xs : ndarray(ns, d)
        Transported source measure
    - new_xt : ndarray(nt, d)
        Transported target measure
    '''
    new_xs = np.zeros(xs.shape)
    new_xt = np.zeros(xt.shape)
    Ns = np.shape(xs)[0]
    Nt = np.shape(xt)[0]

    if m1 < 101:
        for i in range(k):
            #Test mini batch
            sub_xs, sub_weights_a, id_a = small_mini_batch(xs, a, m1, Ns)
            sub_xt, sub_weights_b, id_b = small_mini_batch(xt, b, m2, Nt)

            sub_M = ot.dist(sub_xs, sub_xt, "sqeuclidean").copy()
            G0 = ot.emd(sub_weights_a, sub_weights_b, sub_M)

            new_xs[id_a] += G0.dot(xt[id_b])
            new_xt[id_b] += G0.T.dot(xs[id_a])

    else:
        for i in range(k):
            #Test mini batch
            sub_xs, sub_weights_a, id_a = mini_batch(xs, a, m1, Ns)
            sub_xt, sub_weights_b, id_b = mini_batch(xt, b, m2, Nt)

            sub_M = ot.dist(sub_xs, sub_xt, "sqeuclidean").copy()
            G0 = ot.emd(sub_weights_a, sub_weights_b, sub_M)

            new_xs[id_a] += G0.dot(xt[id_b])
            new_xt[id_b] += G0.T.dot(xs[id_a])

    return 1. / k * Ns * new_xs, 1. / k * Nt * new_xt
示例#2
0
def _compute_copula_ot_dependence(empirical: np.array, target: np.array,
                                  forget: np.array, n_obs: int) -> float:
    """
    Calculates optimal copula transport dependence measure.

    :param empirical: (np.array) Empirical copula.
    :param target: (np.array) Target copula.
    :param forget: (np.array) Forget copula.
    :param nb_obs: (int) Number of observations.
    :return: (float) Optimal copula transport dependence.
    """

    # Uniform distribution on samples
    t_measure, f_measure, e_measure = (np.ones(
        (n_obs, )) / n_obs, np.ones((n_obs, )) / n_obs, np.ones(
            (n_obs, )) / n_obs)

    # Compute the ground distance matrix between locations
    gdist_e2t = ot.dist(empirical, target)
    gdist_e2f = ot.dist(empirical, forget)

    # Compute the optimal transport matrix
    e2t_ot = ot.emd(t_measure, e_measure, gdist_e2t)
    e2f_ot = ot.emd(f_measure, e_measure, gdist_e2f)

    # Compute the optimal transport distance:
    # <optimal transport matrix, ground distance matrix>_F
    e2t_dist = np.trace(np.dot(np.transpose(e2t_ot), gdist_e2t))
    e2f_dist = np.trace(np.dot(np.transpose(e2f_ot), gdist_e2f))

    # Compute the copula ot dependence measure
    ot_measure = 1 - e2t_dist / (e2f_dist + e2t_dist)

    return ot_measure
示例#3
0
文件: test_ot.py 项目: eddardd/POT
def test_emd_emd2_devices_tf():
    if not tf:
        return
    nx = ot.backend.TensorflowBackend()

    n_samples = 100
    n_features = 2
    rng = np.random.RandomState(0)
    x = rng.randn(n_samples, n_features)
    y = rng.randn(n_samples, n_features)
    a = ot.utils.unif(n_samples)
    M = ot.dist(x, y)

    # Check that everything stays on the CPU
    with tf.device("/CPU:0"):
        ab, Mb = nx.from_numpy(a, M)
        Gb = ot.emd(ab, ab, Mb)
        w = ot.emd2(ab, ab, Mb)
        nx.assert_same_dtype_device(Mb, Gb)
        nx.assert_same_dtype_device(Mb, w)

    if len(tf.config.list_physical_devices('GPU')) > 0:
        # Check that everything happens on the GPU
        ab, Mb = nx.from_numpy(a, M)
        Gb = ot.emd(ab, ab, Mb)
        w = ot.emd2(ab, ab, Mb)
        nx.assert_same_dtype_device(Mb, Gb)
        nx.assert_same_dtype_device(Mb, w)
        assert nx.dtype_device(Gb)[1].startswith("GPU")
示例#4
0
    def get_lang_mapping(self, lang1, lang2, metric, entreg):
        path = self._get_shortest_path_from_lang1_to_lang2(lang1, lang2)
        print(path)

        mapping = None
        for i in range(len(path)):
            a = self.lang_dict[path[i][0]]
            a = self.project_into_lang_space(a, self.args.lang_space)
            b = self.lang_dict[path[i][0]].child[path[i][1]]
            b = self.project_into_lang_space(b, self.args.lang_space)

            if path[i][2]:
                plan = ot.emd(b.freq, a.freq, build_MXY(b.projected_matrix, a.projected_matrix))
            else:
                plan = ot.emd(a.freq, b.freq, build_MXY(a.projected_matrix, b.projected_matrix))

            if mapping is None:
                mapping = plan
            else:
                mapping = np.matmul(mapping, plan)

            a.projected_matrix = None
            b.projected_matrix = None

        return mapping
示例#5
0
文件: test_ot.py 项目: eddardd/POT
def test_warnings():
    n = 100  # nb bins
    m = 100  # nb bins

    mean1 = 30
    mean2 = 50

    # bin positions
    x = np.arange(n, dtype=np.float64)
    y = np.arange(m, dtype=np.float64)

    # Gaussian distributions
    a = gauss(n, m=mean1, s=5)  # m= mean, s= std

    b = gauss(m, m=mean2, s=10)

    # loss matrix
    M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1)))**(1. / 2)

    print('Computing {} EMD '.format(1))
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        print('Computing {} EMD '.format(1))
        ot.emd(a, b, M, numItermax=1)
        assert "numItermax" in str(w[-1].message)
示例#6
0
    def forward_seq(self, x_train, x_test):
        N = self.Nmasses
        (Pl_train, P_train) = particleApproximation_v0(x_train, N)
        (Pl_test, P_test) = particleApproximation_v0(x_test, N)

        Pl_tem = 0
        for a in range(2):  #x_train.shape[0]):
            t = Pl_train[a]
            Pl_tem = Pl_tem + t
        Pl_tem = Pl_tem / 2  #x_train.shape[0]
        P_tem = np.ones((N, )) / float(N)

        #Pl_tem_vec=np.reshape(Pl_tem,(Pl_tem.shape[0]*Pl_tem.shape[1],),order='F')

        V = list()
        M = x_train.shape[0]
        for ind in range(M):
            Ni = Pl_train[ind].shape[0]
            C = ot.dist(Pl_train[ind], Pl_tem)
            b = P_tem  # b=np.ones((N,))/float(N)
            a = P_train[ind]  # a=np.ones((Ni,))/float(Ni)
            p = ot.emd(a, b, C)  # exact linear program

            #V.append(np.matmul((N*p).T,Pl_train[ind])-Pl_tem)
            V.append(np.matmul((N * p).T, Pl_train[ind]) +
                     Pl_tem)  # already giving transport displacement?

        V = np.asarray(V)

        x_train_hat = np.zeros((len(V), V[0].shape[0] * V[0].shape[1]))
        for a in range(len(V)):
            x_train_hat[a, :] = np.reshape(V[a],
                                           (V[0].shape[0] * V[0].shape[1], ),
                                           order='F')

        V = list()
        M = x_test.shape[0]
        for ind in range(M):
            Ni = Pl_test[ind].shape[0]
            C = ot.dist(Pl_test[ind], Pl_tem)
            b = P_tem  # b=np.ones((N,))/float(N)
            a = P_test[ind]  # a=np.ones((Ni,))/float(Ni)
            p = ot.emd(a, b, C)  # exact linear program

            #V.append(np.matmul((N*p).T,Pl_test[ind])-Pl_tem)
            V.append(np.matmul((N * p).T, Pl_test[ind]) + Pl_tem)

        V = np.asarray(V)

        x_test_hat = np.zeros((len(V), V[0].shape[0] * V[0].shape[1]))
        for a in range(len(V)):
            x_test_hat[a, :] = np.reshape(V[a],
                                          (V[0].shape[0] * V[0].shape[1], ),
                                          order='F')

        return x_train_hat, x_test_hat, Pl_tem, P_tem
def computeTransportLaplacianSymmetric_fw(distances, Ss, St, xs, xt, reg=1e-9, regls=0, reglt=0, solver=None,
                                          nbitermax=400, thr_stop=1e-8, step='opt', **kwargs):
    distribS = np.ones((xs.shape[0],)) / xs.shape[0]
    distribT = np.ones((xt.shape[0],)) / xt.shape[0]

    Ls = get_laplacian(Ss)
    Lt = get_laplacian(St)

    loop = True

    transp = ot.emd(distribS, distribT, distances)

    niter = 0
    while loop:

        old_transp = transp.copy()

        G = np.asarray(regls * get_gradient1(Ls, xt, old_transp) + reglt * get_gradient2(Lt, xs, old_transp))

        transp0 = ot.emd(distribS, distribT, distances + G)

        E = transp0 - old_transp
        # Ge=get_gradient(E,K)

        if step == 'opt':
            # optimal step size !!!
            tau = max(0, min(1, (-np.sum(E * distances) - np.sum(E * G)) / (
                        2 * regls * quadloss1(E, Ls, xt) + 2 * reglt * quadloss2(E, Lt, xs))))
        else:
            # other step size just in case
            tau = 2. / (niter + 2)  # print "tau:",tau

        transp = old_transp + tau * E

        # print "loss:",np.sum(transp*distances)+quadloss(transp,K)/2

        if niter >= nbitermax:
            loop = False

        err = np.sum(np.abs(transp - old_transp))

        if err < thr_stop:
            loop = False
        # print niter

        niter += 1

        if niter % 1000 == 0:
            print('{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
            print('{:5d}|{:8e}|'.format(niter, err))

    # print "loss:",np.sum(transp*distances)+quadloss(transp,K)/2

    return transp
示例#8
0
def sinkhorn_mapping(set_1, set_2):
    """http://pot.readthedocs.io/en/stable/auto_examples/plot_OT_2D_samples.html"""
    a, b = np.ones((len(set_1), )) / len(set_1), np.ones(
        (len(set_2), )) / len(set_2)
    arr_1 = _generate_arr(set_1)
    arr_2 = _generate_arr(set_2)
    switch_to_cartesian(arr_1, 80 * 400)
    switch_to_cartesian(arr_2, 80 * 400)
    M = ot.dist(arr_1, arr_2)
    G0 = ot.emd(a, b, M)
    counter = 0
    for i in G0:
        inner_counter = 0
        for j in i:
            if j > 0.003:
                print(arr_1[counter], arr_2[inner_counter], j, sep=", ")
            inner_counter += 1
        counter += 1
    pl.figure(4)
    for i in range(arr_1.shape[0]):
        for j in range(arr_2.shape[0]):
            if G0[i, j] > 0.003:
                pl.plot([arr_1[i, 0], arr_2[j, 0]], [arr_1[i, 1], arr_2[j, 1]])
    pl.plot(arr_1[:, 0], arr_1[:, 1], '+b', label='Source samples')
    pl.plot(arr_2[:, 0], arr_2[:, 1], 'xr', label='Target samples')
    pl.show()
示例#9
0
def test(source_samples,
         target_samples,
         weight_function):
    """
    :param source_samples: array (n_source, feature)
    :param target_samples: array (n_target, feature)
    :param weight_function: function determine distance between two samples
    :return:
    """

    assert source_samples.shape[1] == target_samples.shape[1]

    # Employ uniform distribution over all data as empirical distribution (not a histogram)
    source_dist = np.ones((len(source_samples), )) / len(source_samples)
    target_dist = np.ones((len(target_samples), )) / len(target_samples)
    # print('source:', source_dist.shape, np.sum(source_dist))
    # print('target:', target_dist.shape, np.sum(target_dist))

    # build cost matrix (n_source, n_target)
    cost_matrix = np.array([[float(weight_function(__i, __o)) for __i in target_samples] for __o in source_samples])
    print('cost :\n', cost_matrix, cost_matrix.shape)
    # derive optimal transport based on network simplex algorithm
    # Bonneel, N., Van De Panne, M., Paris, S., & Heidrich, W. (2011, December).
    # Displacement interpolation using Lagrangian mass transport.
    # In ACM Transactions on Graphics (TOG) (Vol. 30, No. 6, p. 158). ACM.
    optimal_transport = ot.emd(a=source_dist, b=target_dist, M=cost_matrix)
    return optimal_transport
示例#10
0
文件: core.py 项目: kiri93/graco
def emd(xs, xt, metric='euclidean', numItermax=2**17, **kwargs):

    if len(xs.shape) == 1:
        xs = xs.reshape(-1, 1)
    if len(xt.shape) == 1:
        xt = xt.reshape(-1, 1)

    M = cdist(xs, xt, metric)

    if (M == 0).all():
        return 0.

    M2 = M**2

    a = np.ones(len(xs)) / len(xs)
    b = np.ones(len(xt)) / len(xt)

    for i in range(3):
        try:
            F = ot.emd(a, b, M2, numItermax, **kwargs)
            # np.isclose(np.sum(F), 1)
            return np.sum(M * F)
        except UserWarning:
            numItermax = 2 * numItermax
    assert False, "No conversion reached. Try to increase numItermax!"
示例#11
0
def compute_ot_loss_matrix(y: np.ndarray,
                           y_hat: np.ndarray,
                           D: np.ndarray,
                           ot_niters=10**5):
    """
    Solve the optimal transport problem for the image pixels, and return the OT
    permutation matrix Pi.
    :param y: the ground-truth image.
    :param y_hat: the predicted image.
    :param D: the distance matrix; generate via make_distance_matrix(y.shape[0])
    :param y_hat_as_logits: if True, y_hat is provided as logits.
    :return: Pi, the optimal transport matrix, of shape [d**2, d**2]. The (i,j) entry
    in Pi represents the cost of moving pixel i in y_hat to pixel j in y.
    """
    assert_array_finite(y)
    assert_array_finite(y_hat)
    assert_array_nonnegative(y)
    assert_array_nonnegative(y_hat)

    np.testing.assert_array_equal(y.shape[0],
                                  y.shape[1])  # check images are square
    np.testing.assert_array_equal(y.shape,
                                  y_hat.shape)  # check images same size
    y_hist = normalize_to_histogram(y)
    y_hat_hist = normalize_to_histogram(y_hat)
    PI = ot.emd(y_hat_hist, y_hist, D, numItermax=ot_niters)
    return PI
示例#12
0
def diagonality(ir_dft, ir_dftb):
    #normalize spectra
    ir_dft = [i / np.sum(i) for i in ir_dft]
    ir_dftb = [i / np.sum(i) for i in ir_dftb]

    #diagonality of P https://math.stackexchange.com/questions/1392491/measure-of-how-much-diagonal-a-matrix-is
    Y, X = np.meshgrid(np.linspace(0, 1, ir_dft[0].size),
                       np.linspace(0, 1, ir_dft[0].size))
    C = abs(Y - X)**2

    def dist(P):
        j = np.ones(P.shape[0])
        r = np.arange(P.shape[0])
        r2 = r**2

        n = j @ P @ j.T
        sum_x = r @ P @ j.T
        sum_y = j @ P @ r.T
        sum_x2 = r2 @ P @ j.T
        sum_y2 = j @ P @ r2.T
        sum_xy = r @ P @ r.T

        return (n * sum_xy - sum_x * sum_y) / (np.sqrt(n * sum_x2 - sum_x**2) *
                                               np.sqrt(n * sum_y2 - sum_y**2))

    # print('Case (Diagonality)')
    d = np.zeros((len(ir_dft), len(ir_dftb)))
    for i, a in enumerate(ir_dft):
        for j, b in enumerate(ir_dftb):
            # P = sink.sinkhorn(a,b, 0.003).P
            P = ot.emd(a, b, C)
            d[i, j] = dist(P)

    return d
示例#13
0
def match_spots_using_spatial_heuristic(X,
                                        Y,
                                        use_ot: bool = True) -> np.ndarray:
    """
    Calculates and returns a mapping of spots using a spatial heuristic.

    Args:
        X (array-like, optional): Coordinates for spots X.
        Y (array-like, optional): Coordinates for spots Y.
        use_ot: If ``True``, use optimal transport ``ot.emd()`` to calculate mapping. Otherwise, use Scipy's ``min_weight_full_bipartite_matching()`` algorithm.
    
    Returns:
        Mapping of spots using a spatial heuristic.
    """
    n1, n2 = len(X), len(Y)
    X, Y = norm_and_center_coordinates(X), norm_and_center_coordinates(Y)
    dist = scipy.spatial.distance_matrix(X, Y)
    if use_ot:
        pi = ot.emd(np.ones(n1) / n1, np.ones(n2) / n2, dist)
    else:
        row_ind, col_ind = scipy.sparse.csgraph.min_weight_full_bipartite_matching(
            scipy.sparse.csr_matrix(dist))
        pi = np.zeros((n1, n2))
        pi[row_ind, col_ind] = 1 / max(n1, n2)
        if n1 < n2:
            pi[:, [(j not in col_ind) for j in range(n2)]] = 1 / (n1 * n2)
        elif n2 < n1:
            pi[[(i not in row_ind) for i in range(n1)], :] = 1 / (n1 * n2)
    return pi
示例#14
0
def OT_emd(Xl, Xr):
    # loss matrix
    C = ot.dist(Xl, Xr)
    M = C / C.max()

    n = len(Xl)
    m = len(Xr)
    print(n, m)

    a, b = np.ones((n, )) / n, np.ones((m, )) / m

    G0 = ot.emd(a, b, M)

    plt.figure(3)
    plt.imshow(G0, interpolation='nearest')
    plt.title('OT matrix G0')

    plt.figure(4)
    ot.plot.plot2D_samples_mat(Xl[:, :2], Xr[:, :2], G0, c=[.5, .5, 1])
    plt.plot(Xl[:, 0], Xl[:, 1], '+b', label='Source samples')
    plt.plot(Xr[:, 0], Xr[:, 1], 'xr', label='Target samples')
    plt.legend(loc=0)
    plt.title('OT matrix with samples')

    return (G0)
示例#15
0
def OT_scores_emd(Xl, Xr, scoresl, scoresr, mu=0.5):
    # loss matrix
    print(scoresl)
    C = ot.dist(Xl, Xr) + mu * ot.dist(np.expand_dims(scoresl, axis=1),
                                       np.expand_dims(scoresr, axis=1))
    M = C / C.max()

    n = len(Xl)
    m = len(Xr)
    print(n, m)

    a, b = scoresl, scoresr

    G0 = ot.emd(a, b, M)

    plt.figure(3)
    plt.imshow(G0, interpolation='nearest')
    plt.title('OT matrix G0')

    plt.figure(4)
    ot.plot.plot2D_samples_mat(Xl[:, :2], Xr[:, :2], G0, c=[.5, .5, 1])
    plt.plot(Xl[:, 0], Xl[:, 1], '+b', label='Source samples')
    plt.plot(Xr[:, 0], Xr[:, 1], 'xr', label='Target samples')
    plt.legend(loc=0)
    plt.title('OT matrix with samples')

    return (G0)
def get_node_cell_type_intersection(tree):
    # find the amount of intersection each node has with each cell type
    labels, suppl_image_coords = get_image_coords(supplementary_data)
    layers = list(get_layers(tree))
    cost_matrices, costs = [], []
    for layer_ind, layer in enumerate(layers):
        if len(layer) == 0:
            continue
        print('Layer', layer_ind, 'number of nodes', len(layer))
        node_prop = np.array([node.coords.shape[0] for node in layer])
        node_prop = node_prop / node_prop.sum()
        suppl_prop = np.array(
            [image_coords.shape[0] for image_coords in suppl_image_coords])
        suppl_prop = suppl_prop / suppl_prop.sum()
        cost_matrix = get_intersection(layer, suppl_image_coords)
        transport_matrix, log = ot.emd(node_prop,
                                       suppl_prop,
                                       1 - cost_matrix,
                                       log=True)
        costs.append(log['cost'])
        cost_matrices.append(cost_matrix)
    cost_min_ind = np.argmin(costs)
    cost_matrix = cost_matrices[cost_min_ind]
    layer = layers[cost_min_ind]
    print('Optimal layer is {} with {} clusters.'.format(
        cost_min_ind, len(layer)))
    annotate_optimal_layer(labels, cost_matrix, layer)
示例#17
0
def weighted_barycenter_algorithm(ls, test_func, X_orig, Yi_orig, bi, lambdas, tol=1e-8, metric='euclidean', reg=1e-2, maxiter=20, bregmanmaxiter=30):
    """
    k : number of supports in X
    X_orig  : init of barycenter (k * d)
    Yi_orig : list of distributions size (k_i * d)
    bi : list of weights size (k_i)
    tol: tolerance
    """
    assert(len(Yi_orig) == len(bi))
    assert(len(X_orig[0]) == len(Yi_orig[0][0]))
    
    X = X_orig
    Yi = Yi_orig
    
    displacement = 1
    niter = 0

    while (displacement > tol and niter < maxiter):
        X_prev = X
        a = compute_barycenter_weight(X, Yi, bi, lambdas, tol=tol, maxiter=bregmanmaxiter, reg=reg)
        Tsum = np.zeros(X.shape)

        for i in range(0, len(bi)): 
            M = build_MXY(X, Yi[i], metric=metric)
            #T = ot.sinkhorn(a, bi[i], M, reg)
            T = ot.emd(a, bi[i], M)
            Tsum = Tsum + lambdas[i] * np.reshape(1. / a, (-1, 1)) * np.matmul(T, Yi[i])

        displacement = np.sum(np.square(Tsum - X))

        print("~~~~epoch "+str(niter)+"~~~~")

        #i = ls.index('en')
        #for j in range(len(ls)):
        #    if i!=j and (not ls[i].isdigit()) and (not ls[j].isdigit()):
        #        mapping = ot.emd(bi[i], a, build_MXY(Yi[i], X))
        #        mapping2 = ot.emd(a, bi[j], build_MXY(X, Yi[j]))
        #        print("="*20+"begin testing mapping for "+ls[i]+" and "+ls[j]+"="*21)
        #        test_func(ls[i], ls[j], np.dot(mapping, mapping2))
        #        mapping = None
        #        mapping2 = None

        #for i in range(len(ls)):
        #    for j in range(len(ls)):
        #        if i!=j and (not ls[i].isdigit()) and (not ls[j].isdigit()):
        #            mapping = ot.emd(bi[i], a, build_MXY(Yi[i], X)) 
        #            mapping2 = ot.emd(a, bi[j], build_MXY(X, Yi[j])) 
        #            print("="*20+"begin testing mapping for "+ls[i]+" and "+ls[j]+"="*21)
        #            try:
        #                test_func(ls[i], ls[j], np.dot(mapping, mapping2))
        #            except:
        #                print("failed to eval on "+ls[i]+" and "+ls[j])
        #            mapping = None
        #            mapping2 = None


        X = Tsum
        niter += 1

    return X, a
def rbd_wasserstein_approx2(f1,f2):
    im1 = rbd_read(f1).flatten().reshape(-1,1)
    im2 = rbd_read(f2).flatten().reshape(-1,1)

    gauss1 = mix.GaussianMixture(2).set_params(tol=1e-1).fit(im1)
    gauss2 = mix.GaussianMixture(2).set_params(tol=1e-1).fit(im2)

    m11,m12 = gauss1.means_.flatten()
    v11,v12 = gauss1.covariances_.flatten()
    p11,p12 = gauss1.weights_.flatten()

    m21,m22 = gauss2.means_.flatten()
    v21,v22 = gauss2.covariances_.flatten()
    p21,p22 = gauss2.weights_.flatten()
    
    d1 = np.array([p11,p12,0,0])
    d2 = np.array([0,0,p21,p22])

    m = np.array([m11,m12,m21,m22])
    v = np.array([v11,v12,v21,v22])
    
    weight_matrix = np.zeros((len(d1),len(d2)))
    for i in range(len(d1)):
        for j in range(len(d1)):
            weight_matrix[i,j] = gauss_wasserstein(m[i],v[i],m[j],v[j])

    return(ot.emd(d1,d2,weight_matrix))
 def solve_optimal_transport_problem(self, ):
     self.n = len(self.weights_model1)
     self.a, self.b = np.ones((self.n,)) / self.n, np.ones((self.n,)) / self.n  # uniform distribution on samples
     # loss matrix
     self.M = ot.dist(self.weights_model1, self.weights_model2)
     self.M /= self.M.max()
     self.G0 = ot.emd(self.a, self.b, self.M)
示例#20
0
    def compute_gamma(self, pred):
        '''
        Function to compute the OT between the target and source samples.
        :return:Gamma the OT matrix
        '''
        # Reshaping the samples into vectors of dimensions number of modalities * patch_dimension.
        # train_vecs are of shape (batch_size, d)
        train_vec_source = np.reshape(self.image_representation_source, (self.batch_size, self.image_representation_source.shape[1]*
                                                                         self.image_representation_source.shape[2]*
                                                                         self.image_representation_source.shape[3]*
                                                                         self.image_representation_source.shape[4]))
        
        train_vec_target = np.reshape(self.image_representation_target, (self.batch_size, self.image_representation_target.shape[1]*
                                                                         self.image_representation_target.shape[2]*
                                                                         self.image_representation_target.shape[3]*
                                                                         self.image_representation_target.shape[4]))
        # Same for the ground truth but the GT is the same for both modalities

        truth_vec_source = np.reshape(self.train_batch[1][:self.batch_size],
                                      (self.batch_size, self.config.patch_shape[0]*self.config.patch_shape[1]*self.config.patch_shape[2]))
        pred_vec_source = np.reshape(pred[:self.batch_size],
                                      (self.batch_size, self.config.patch_shape[0]*self.config.patch_shape[1]*self.config.patch_shape[2]))

        # We don't have information on target labels
        pred_vec_target = np.reshape(pred[self.batch_size:],
                                      (self.batch_size, self.config.patch_shape[0]*self.config.patch_shape[1]*self.config.patch_shape[2]))

        # Compute the distance between samples and between the source_truth and the target prediction.
        C0 = cdist(train_vec_source, train_vec_target, metric="sqeuclidean")
        C1 = cdist(truth_vec_source, pred_vec_target, metric=self.config.jdot_distance)
        C = K.get_value(self.jdot_alpha)*C0+K.get_value(self.jdot_beta)*C1

        # Computing gamma using the OT library
        gamma = ot.emd(ot.unif(self.batch_size), ot.unif(self.batch_size), C)
        return gamma
def gromov_wasserstein_distance_latent_space_rand_emd(data_path, num_labels,
                                                      num_clusters,
                                                      result_path, args):
    import scipy as sp
    import matplotlib.pylab as pl
    import ot
    # z = np.load(data_path+ "/L-1/z.npy")  # -1 means no discrimation for labelsa, the same vae transform , orthogonal concept to whether cluster on this z space or use other mehtod to split into clusters
    z = np.load(
        data_path + "/L-1" + config.z_name, allow_pickle=True
    )  # -1 means no discrimation for labelsa, the same vae transform , orthogonal concept to whether cluster on this z space or use other mehtod to split into clusters
    np.random.shuffle(z)
    results = {}
    mat = np.zeros((num_clusters, num_clusters))
    from sklearn.model_selection import KFold
    kf = KFold(n_splits=num_clusters)
    i = 0
    cluster_idx = {}
    for train_eval_idx, test_idx in kf.split(z):
        cluster_idx[str(i)] = test_idx
        i = i + 1

    i = 0
    print(z.shape)
    for i in range(num_clusters):
        xs = z[cluster_idx[str(i)]]
        print(xs.shape)
        for j in range(num_clusters):
            xt = z[cluster_idx[str(j)]]
            print(xt.shape)
            # Compute distance kernels, normalize them and then display
            n_samples = min(xs.shape[0], xt.shape[0])
            if args.debug == True:
                n_samples = 100
            xs = xs[:n_samples]
            xt = xt[:n_samples]
            M = sp.spatial.distance.cdist(xt, xs)
            M /= M.max()
            ds, dt = np.ones((len(xs), )) / len(xs), np.ones(
                (len(xt), )) / len(xt)
            g0, loss = ot.emd(ds, dt, M, log=True)
            print(
                'Gromov-Wasserstein distances between {}_{} clusters: {}--{} '.
                format(i, j, str(loss), str(np.sum(g0))))
            #results[str(i)+str(j)]={"GW":log0['gw_dist'],"EGW":log['gw_dist']}
            results[str(i) + str(j)] = loss["cost"]
            mat[i, j] = loss["cost"]

            #pl.figure(1, (10, 5))
            #pl.subplot(1, 2, 1)
            #pl.imshow(gw0, cmap='jet')
            #pl.title('Gromov Wasserstein')
            #pl.subplot(1, 2, 2)
            #pl.imshow(gw, cmap='jet')
            #pl.title('Entropic Gromov Wasserstein')
            #pl.savefig(result_path + "/WD_TSNE{}_{}.jpg".format(i,j))
    # print(results)
    print(mat)
    with open("wd_rand.txt", 'a') as lf:
        lf.write(str(results))
    return results
示例#22
0
文件: test_ot.py 项目: yg79/POT
def test_emd_1d_emd2_1d_with_weights():
    # test emd1d gives similar results as emd
    n = 20
    m = 30
    rng = np.random.RandomState(0)
    u = rng.randn(n, 1)
    v = rng.randn(m, 1)

    w_u = rng.uniform(0., 1., n)
    w_u = w_u / w_u.sum()

    w_v = rng.uniform(0., 1., m)
    w_v = w_v / w_v.sum()

    M = ot.dist(u, v, metric='sqeuclidean')

    G, log = ot.emd(w_u, w_v, M, log=True)
    wass = log["cost"]
    G_1d, log = ot.emd_1d(u, v, w_u, w_v, metric='sqeuclidean', log=True)
    wass1d = log["cost"]
    wass1d_emd2 = ot.emd2_1d(u, v, w_u, w_v, metric='sqeuclidean', log=False)
    wass1d_euc = ot.emd2_1d(u, v, w_u, w_v, metric='euclidean', log=False)

    # check loss is similar
    np.testing.assert_allclose(wass, wass1d)
    np.testing.assert_allclose(wass, wass1d_emd2)

    # check loss is similar to scipy's implementation for Euclidean metric
    wass_sp = wasserstein_distance(u.reshape((-1, )), v.reshape((-1, )), w_u,
                                   w_v)
    np.testing.assert_allclose(wass_sp, wass1d_euc)

    # check constraints
    np.testing.assert_allclose(w_u, G.sum(1))
    np.testing.assert_allclose(w_v, G.sum(0))
示例#23
0
def wass1dim(data1, data2, numBins=200):
    ''' Compare two one-dimensional arrays by the 
    Wasserstein metric (https://en.wikipedia.org/wiki/Wasserstein_metric).
    The input data should have outliers removed.
    
    Parameters
    ----------
        data1, data2: two one-dimensional arrays to compare.
        numBins: the number of bins.
        
    Outputs
    -------
        result: the computed Wasserstein metric.
        
    '''
    numBins = 200  ## number of bins
    upper = np.max((data1.max(), data2.max()))
    lower = np.min((data1.min(), data2.min()))
    xbins = np.linspace(lower, upper, numBins + 1)
    density1, _ = np.histogram(data1, density=False, bins=xbins)
    density2, _ = np.histogram(data2, density=False, bins=xbins)
    density1 = density1 / np.sum(density1)
    density2 = density2 / np.sum(density2)

    # pairwise distance matrix between bins
    distMat = distance_matrix(xbins[1:].reshape(numBins, 1),
                              xbins[1:].reshape(numBins, 1))
    M = distMat
    T = ot.emd(density1, density2, M)  # optimal transport matrix
    result = np.sum(T * M)  # the objective data
    return result
示例#24
0
    def compute_deepjdot_loss(features_source, ys_pred, ys, features_target,
                              yt_pred, gamma_criterion, g_criterion):
        # Compute the euclidian distance in the feature space
        #C0 = cdist(features_source.detach().cpu().numpy(),
        #           features_target.detach().cpu().numpy(), p=0.2)
        C0 = torch.square(torch.cdist(features_source, features_target, p=2.0))
        # Compute the loss function of labels
        #C1 = F.cross_entropy(yt_pred, ys)
        classes = torch.arange(yt_pred.shape[1]).reshape(1, yt_pred.shape[1])
        one_hot_ys = (ys.unsqueeze(1) == classes.to(device=c.device)).float()
        C1 = torch.square(
            torch.cdist(one_hot_ys, F.softmax(yt_pred, dim=1), p=2.0))
        C = c.alpha * C0 + c.tloss * C1
        # Compute the gamma function
        #gamma = ot.emd(ot.unif(features_source.shape[0]),
        #               ot.unif(features_target.shape[0]), C)
        gamma = ot.emd(
            torch.from_numpy(ot.unif(
                features_source.shape[0])).to(device=c.device),
            torch.from_numpy(ot.unif(
                features_target.shape[0])).to(device=c.device), C)
        # ot.emd: solve the OT problem for the pdfs of both source and target features
        # ot.unif: return an histogram of the arguments

        # Align Loss
        gamma_loss = gamma_criterion(features_source, features_target, gamma)

        # gamma loss get the fetures of the source, the features of the target
        # and gamma. It first performs the L2 distance between the features and
        # then return self.jdot_alpha * dnn.K.sum(self.gamma * (gdist))

        # Classifier Loss
        clf_loss = g_criterion(ys, ys_pred, yt_pred, gamma)
        return clf_loss, gamma_loss, clf_loss + gamma_loss
示例#25
0
 def _match_shorter(self, shorter):
     """compute mapping if new points are less than known points"""
     M, known, _ = _dist_closest(self.points, shorter)
     G = ot.emd([], [], M[known])
     result = np.empty((len(shorter, )), dtype=int)
     result[np.argmax(G, axis=1)] = known
     return result
示例#26
0
def persistence_wasserstein_distance(x: np.ndarray, y: np.ndarray,
                                     ground_distance: np.ndarray) -> float:
    """Compute an approximation of Persistence Wasserstein_1 distance
    between persistenced iagrams with vector representations ``x`` and ``y``
    using the ground distance provided.

    Parameters
    ----------
    x: array of shape (n_gaussians,)
        The vectorization of the first persistence diagram

    y: array of shape (n_gaussians,)
        The vectorization of the first persistence diagram

    ground_distance: array of shape (n_gaussians + 1, n_gaussians + 1)
        The amended ground-distance as output by ``add_birth_death_line``

    Returns
    -------
    dist: float
        Ann approximation of Persistence Wasserstein_1 distance
        between persistenced iagrams with vector representations
        ``x`` and ``y``
    """
    x_a = np.append(x, y.sum())
    x_a /= x_a.sum()
    y_a = np.append(y, x.sum())
    y_a /= y_a.sum()
    plan = ot.emd(x_a, y_a, ground_distance)
    return (x.sum() + y.sum()) * (plan * ground_distance).sum()
示例#27
0
    def graph_d(self, graph1, graph2):
        """ Compute the Wasserstein distance between two graphs. Uniform weights are used.        
        Parameters
        ----------
        graph1 : a Graph object
        graph2 : a Graph object
        Returns
        -------
        The Wasserstein distance between the features of graph1 and graph2
        """

        nodes1 = graph1.nodes()
        nodes2 = graph2.nodes()
        t1masses = np.ones(len(nodes1)) / len(nodes1)
        t2masses = np.ones(len(nodes2)) / len(nodes2)
        x1 = self.reshaper(graph1.all_matrix_attr())
        x2 = self.reshaper(graph2.all_matrix_attr())

        if self.features_metric == 'dirac':
            f = lambda x, y: x != y
            M = ot.dist(x1, x2, metric=f)
        else:
            M = ot.dist(x1, x2, metric=self.features_metric)
        if np.max(M) != 0:
            M = M / np.max(M)
        self.M = M

        transp = ot.emd(t1masses, t2masses, M)
        self.transp = transp

        return np.sum(transp * M)
示例#28
0
def pot_wasserstein_mapper(net1, net2, metric_space, p=None, q=None):
    """ Computes vanilla EMD (over Hausdorff dist) for mapper graphs

    Parameters
    ----------
    net1 : lightweight_mapper.Network
        Mapper graph
    net2 : lightweight_mapper.Network
        Mapper graph
    metric_space : np.array
        Pairwise distance matrix
    p : np.array - nx1
        Distribution over nodes corresponding to net1
    q : np.array - nx1
        Distribution over nodes corresponding to net2

    Returns
    -------
    EMD (Cost = Hausdorff dist)
    """
    C3 = network_merge_distance(net1, net2, metric_space)
    if p is None or q is None:
        p = np.diag(net1.adjacency_matrix.toarray())
        p = p / p.sum()

        q = np.diag(net2.adjacency_matrix.toarray())
        q = q / q.sum()

    gw_dist = ot.emd2(p, q, C3)
    params = ot.emd(p, q, C3)
    return gw_dist, params
示例#29
0
 def makeTransportPlan(self):
     if self.source_data and self.target_data:
         if self.source_data_size == self.target_data_size:
             loss_matrix = ot.dist(self.source_data, self.target_data)
             loss_matrix = loss_matrix / loss_matrix.max()
             if not self.source_weight:
                 self.source_weight = np.ones(
                     (self.source_data_size, )) / self.source_data_size
                 print(
                     "The Source weights are intiialized to one. If custome weight please load weight."
                 )
             if not self.target_weight:
                 self.target_weight = np.ones(
                     (self.target_data_size, )) / self.target_data_size
                 print(
                     "The Target weights are intiialized to one. If custome weight please load weight."
                 )
             transport = ot.emd(self.source_weight,
                                self.target_weight,
                                loss_matrix,
                                log=True)
             print("Transport Plan Complete")
             print("The cost is: {}".format(transport[1]['cost']))
             self.transport_plan = transport[0]
             return transport
         else:
             print(
                 "Optimal Transport Plan not complete due to mismatch in Source and Target Size."
             )
             return
     else:
         print(
             "Optimal Transport Plan not complete. Please add Source & Target data and rerun."
         )
         return
示例#30
0
def gnpr_distance(x: np.array,
                  y: np.array,
                  theta: float,
                  n_bins: int = 50) -> float:
    """
    Calculates the empirical distance between two random variables under the Generic Non-Parametric Representation
    (GNPR) approach.

    Formula for the distance is taken from https://www.researchgate.net/publication/322714557 (p.72).

    Parameter theta defines what type of information dependency is being tested:
    - for theta = 0 the distribution information is tested
    - for theta = 1 the dependence information is tested
    - for theta = 0.5 a mix of both information types is tested

    With theta in [0, 1] the distance lies in the range [0, 1] and is a metric.
    (See original work for proof, p.71)

    This method is modified as it uses 1D Optimal Transport Distance to measure
    distribution distance. This solves the issue of defining support and choosing
    a number of bins. The number of bins can be given as an input to speed up calculations.
    Big numbers of bins can take a long time to calculate.

    :param x: (np.array/pd.Series) X vector.
    :param y: (np.array/pd.Series) Y vector (same number of observations as X).
    :param theta: (float) Type of information being tested. Falls in range [0, 1].
    :param n_bins: (int) Number of bins to use to split the X and Y vector observations.
        (100 by default)
    :return: (float) Distance under GNPR approach.
    """

    # Number of observations
    num_obs = x.shape[0]

    # Calculating the d_1 distance
    dist_1 = 3 / (num_obs * (num_obs**2 - 1)) * (np.power(x - y, 2).sum())

    # Binning observations
    x_binned = pd.Series(np.histogram(x, bins=n_bins)[0]) / num_obs
    y_binned = pd.Series(np.histogram(y, bins=n_bins)[0]) / num_obs

    # Bin positions
    bins = np.linspace(0, 1, n_bins)

    # Loss matrix
    loss_matrix = ot.dist(bins.reshape((n_bins, 1)), bins.reshape((n_bins, 1)))
    loss_matrix /= loss_matrix.max()

    # Optimal transportation matrix
    ot_matrix = ot.emd(x_binned.sort_values(), y_binned.sort_values(),
                       loss_matrix)

    # Optimal transport distance
    dist_0 = np.trace(np.dot(np.transpose(ot_matrix), loss_matrix))

    # Calculating the GNPR distance
    distance = theta * dist_1 + (1 - theta) * dist_0

    return distance**(1 / 2)
示例#31
0
文件: test_ot.py 项目: HelenLiGit/POT
def test_dual_variables():
    n = 5000  # nb bins
    m = 6000  # nb bins

    mean1 = 1000
    mean2 = 1100

    # bin positions
    x = np.arange(n, dtype=np.float64)
    y = np.arange(m, dtype=np.float64)

    # Gaussian distributions
    a = gauss(n, m=mean1, s=5)  # m= mean, s= std

    b = gauss(m, m=mean2, s=10)

    # loss matrix
    M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2)

    print('Computing {} EMD '.format(1))

    # emd loss 1 proc
    ot.tic()
    G, log = ot.emd(a, b, M, log=True)
    ot.toc('1 proc : {} s')

    ot.tic()
    G2 = ot.emd(b, a, np.ascontiguousarray(M.T))
    ot.toc('1 proc : {} s')

    cost1 = (G * M).sum()
    # Check symmetry
    np.testing.assert_array_almost_equal(cost1, (M * G2.T).sum())
    # Check with closed-form solution for gaussians
    np.testing.assert_almost_equal(cost1, np.abs(mean1 - mean2))

    # Check that both cost computations are equivalent
    np.testing.assert_almost_equal(cost1, log['cost'])
    check_duality_gap(a, b, M, G, log['u'], log['v'], log['cost'])
def solve(fake_feature,true_feature):
    # get the optimal matching between fake and true. assume #fake < # true

    M=distance(fake_feature,true_feature,True)
    emd = ot.emd([], [], M.numpy())

    map= np.zeros(fake_feature.size(0))

    for i in range(0,fake_feature.size(0)):
        for j in range(0,true_feature.size(0)):
            if emd[i][j]>0:
                map[i]=j
    return map
示例#33
0
文件: jdot.py 项目: HelenLiGit/JDOT
def jdot_krr(X,y,Xtest,gamma_g=1, numIterBCD = 10, alpha=1,lambd=1e1, 
             method='emd',reg=1,ktype='linear'):
    # Initializations
    n = X.shape[0]
    ntest = Xtest.shape[0]
    wa=np.ones((n,))/n
    wb=np.ones((ntest,))/ntest

    # original loss
    C0=cdist(X,Xtest,metric='sqeuclidean')
    #print np.max(C0)
    C0=C0/np.median(C0)

    # classifier    
    g = classif.KRRClassifier(lambd)

    # compute kernels
    if ktype=='rbf':
        Kt=sklearn.metrics.pairwise.rbf_kernel(Xtest,Xtest,gamma=gamma_g)
    else:
        Kt=sklearn.metrics.pairwise.linear_kernel(Xtest,Xtest)

    C = alpha*C0#+ cdist(y,ypred,metric='sqeuclidean')
    k=0
    while (k<numIterBCD):# and not changeLabels:
        k=k+1
        if method=='sinkhorn':
            G = ot.sinkhorn(wa,wb,C,reg)
        if method=='emd':
            G=  ot.emd(wa,wb,C)

        Yst=ntest*G.T.dot(y)

        g.fit(Kt,Yst)
        ypred=g.predict(Kt)
       
        # function cost
        fcost = cdist(y,ypred,metric='sqeuclidean')

        C=alpha*C0+fcost
            
    return g,np.sum(G*(fcost))    
示例#34
0
文件: test_ot.py 项目: HelenLiGit/POT
def test_emd_empty():
    # test emd and emd2 for simple identity
    n = 100
    rng = np.random.RandomState(0)

    x = rng.randn(n, 2)
    u = ot.utils.unif(n)

    M = ot.dist(x, x)

    G = ot.emd([], [], M)

    # check G is identity
    np.testing.assert_allclose(G, np.eye(n) / n)
    # check constratints
    np.testing.assert_allclose(u, G.sum(1))  # cf convergence sinkhorn
    np.testing.assert_allclose(u, G.sum(0))  # cf convergence sinkhorn

    w = ot.emd2([], [], M)
    # check loss=0
    np.testing.assert_allclose(w, 0)
示例#35
0
文件: test_ot.py 项目: HelenLiGit/POT
def test_warnings():
    n = 100  # nb bins
    m = 100  # nb bins

    mean1 = 30
    mean2 = 50

    # bin positions
    x = np.arange(n, dtype=np.float64)
    y = np.arange(m, dtype=np.float64)

    # Gaussian distributions
    a = gauss(n, m=mean1, s=5)  # m= mean, s= std

    b = gauss(m, m=mean2, s=10)

    # loss matrix
    M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2)

    print('Computing {} EMD '.format(1))
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        print('Computing {} EMD '.format(1))
        ot.emd(a, b, M, numItermax=1)
        assert "numItermax" in str(w[-1].message)
        assert len(w) == 1
        a[0] = 100
        print('Computing {} EMD '.format(2))
        ot.emd(a, b, M)
        assert "infeasible" in str(w[-1].message)
        assert len(w) == 2
        a[0] = -1
        print('Computing {} EMD '.format(2))
        ot.emd(a, b, M)
        assert "infeasible" in str(w[-1].message)
        assert len(w) == 3
示例#36
0
pl.xlabel('x')

pl.ylabel('y')
pl.legend()
pl.title('Toy regression example')

#%% TLOT

itermax=5
alpha=1
C0=cdist(xs,xt,metric='sqeuclidean')
#print np.max(C0)
C0=C0/np.median(C0)
fcost = cdist(ys,yt,metric='sqeuclidean')
C=alpha*C0+fcost
G0=ot.emd(ot.unif(n),ot.unif(n),C)

fit_params={'epochs':100}

model,loss = jdot.jdot_nn_l2(get_model,xs,ys,xt,ytest=yt,fit_params=fit_params,numIterBCD = itermax, alpha=alpha)

ypred=model.predict(xvisu.reshape((-1,1)))


pl.figure(2)
pl.clf()
pl.scatter(xs,ys,label='Source samples',edgecolors='k')
pl.scatter(xt,yt,label='Target samples',edgecolors='k')
pl.plot(xvisu,fs_s(xvisu),'b',label='Source model')
pl.plot(xvisu,fs_t(xvisu),'g',label='Target model')
pl.plot(xvisu,ypred,'r',label='JDOT model')
示例#37
0
文件: jdot.py 项目: HelenLiGit/JDOT
def jdot_nn_l2(get_model,X,Y,Xtest,ytest=[],fit_params={},reset_model=True, numIterBCD = 10, alpha=1,method='emd',reg=1,nb_epoch=100,batch_size=10):
    # get model should return a new model compiled with l2 loss
    
    
    # Initializations
    n = X.shape[0]
    ntest = Xtest.shape[0]
    wa=np.ones((n,))/n
    wb=np.ones((ntest,))/ntest

    # original loss
    C0=cdist(X,Xtest,metric='sqeuclidean')
    C0=C0/np.max(C0)

    # classifier    
    g = get_model()
        
    TBR = []
    sav_fcost = []
    sav_totalcost = []

    results = {}

    #Init initial g(.)
    g.fit(X,Y,**fit_params)
    ypred=g.predict(Xtest)

    C = alpha*C0+ cdist(Y,ypred,metric='sqeuclidean')

    # do it only if the final labels were given
    if len(ytest):
        ydec=np.argmax(ypred,1)+1
        TBR1=np.mean(ytest==ydec)
        TBR.append(TBR1)

    k=0
    changeLabels=False
    while (k<numIterBCD):# and not changeLabels:
        k=k+1
        if method=='sinkhorn':
            G = ot.sinkhorn(wa,wb,C,reg)
        if method=='emd':
            G=  ot.emd(wa,wb,C)

        Yst=ntest*G.T.dot(Y)
        
        if reset_model:
            g=get_model()

        g.fit(Xtest,Yst,**fit_params)
        ypred=g.predict(Xtest)
        
        # function cost
        fcost = cdist(Y,ypred,metric='sqeuclidean')
        #pl.figure()
        #pl.imshow(fcost)
        #pl.show()

        C=alpha*C0+fcost

        ydec_tmp=np.argmax(ypred,1)+1
        if k>1:
            changeLabels=np.all(ydec_tmp==ydec)
            sav_fcost.append(np.sum(G*fcost))
            sav_totalcost.append(np.sum(G*(alpha*C0+fcost)))

        ydec=ydec_tmp
        if len(ytest):
            TBR1=np.mean((ytest-ypred)**2)
            TBR.append(TBR1)
            
    results['ypred0']=ypred
    results['ypred']=np.argmax(ypred,1)+1
    if len(ytest):
        results['mse']=TBR
    results['clf']=g
    results['fcost']=sav_fcost
    results['totalcost']=sav_totalcost
    return g,results    
示例#38
0
pl.subplot(1, 3, 2)
pl.imshow(M2, interpolation='nearest')
pl.title('Squared Euclidean cost')

pl.subplot(1, 3, 3)
pl.imshow(Mp, interpolation='nearest')
pl.title('Sqrt Euclidean cost')
pl.tight_layout()

##############################################################################
# Dataset 1 : Plot OT Matrices
# ----------------------------


#%% EMD
G1 = ot.emd(a, b, M1)
G2 = ot.emd(a, b, M2)
Gp = ot.emd(a, b, Mp)

# OT matrices
pl.figure(3, figsize=(7, 3))

pl.subplot(1, 3, 1)
ot.plot.plot2D_samples_mat(xs, xt, G1, c=[.5, .5, 1])
pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
pl.axis('equal')
# pl.legend(loc=0)
pl.title('OT Euclidean')

pl.subplot(1, 3, 2)
示例#39
0
文件: jdot.py 项目: HelenLiGit/JDOT
def jdot_svm(X,y,Xtest,  
                      ytest=[],gamma_g=1, numIterBCD = 10, alpha=1,
                      lambd=1e1, method='emd',reg_sink=1,ktype='linear'):
    # Initializations
    n = X.shape[0]
    ntest = Xtest.shape[0]
    wa=np.ones((n,))/n
    wb=np.ones((ntest,))/ntest

    # original loss
    C0=cdist(X,Xtest,metric='sqeuclidean')

    # classifier    
    g = classif.SVMClassifier(lambd)

    # compute kernels
    if ktype=='rbf':
        Kt=sklearn.metrics.pairwise.rbf_kernel(Xtest,gamma=gamma_g)
        #Ks=sklearn.metrics.pairwise.rbf_kernel(X,gamma=gamma_g)
    else:
        Kt=sklearn.metrics.pairwise.linear_kernel(Xtest)
        #Ks=sklearn.metrics.pairwise.linear_kernel(X)
        
    TBR = []
    sav_fcost = []
    sav_totalcost = []

    results = {}
    ypred=np.zeros(y.shape)

    Chinge=np.zeros(C0.shape)
    C=alpha*C0+Chinge
    
    # do it only if the final labels were given
    if len(ytest):
        TBR.append(np.mean(ytest==np.argmax(ypred,1)+1))

    k=0
    while (k<numIterBCD):
        k=k+1
        if method=='sinkhorn':
            G = ot.sinkhorn(wa,wb,C,reg_sink)
        if method=='emd':
            G=  ot.emd(wa,wb,C)

        if k>1:
            sav_fcost.append(np.sum(G*Chinge))
            sav_totalcost.append(np.sum(G*(alpha*C0+Chinge)))

            
        Yst=ntest*G.T.dot((y+1)/2.)
        #Yst=ntest*G.T.dot(y_f)
        g.fit(Kt,Yst)
        ypred=g.predict(Kt)

        
        Chinge=classif.loss_hinge(y,ypred)
        #Chinge=SVMclassifier.loss_hinge(y_f*2-1,ypred*2-1)
        
        C=alpha*C0+Chinge

        if len(ytest):
            TBR1=np.mean(ytest==np.argmax(ypred,1)+1)
            TBR.append(TBR1)
            

    results['ypred']=np.argmax(ypred,1)+1
    if len(ytest):
        results['TBR']=TBR

    results['clf']=g
    results['G']=G
    results['fcost']=sav_fcost
    results['totalcost']=sav_totalcost
    return g,results
示例#40
0
pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
pl.legend(loc=0)
pl.title('Source and target distributions')

pl.figure(2)
pl.imshow(M, interpolation='nearest')
pl.title('Cost matrix M')

##############################################################################
# Compute EMD
# -----------

#%% EMD

G0 = ot.emd(a, b, M)

pl.figure(3)
pl.imshow(G0, interpolation='nearest')
pl.title('OT matrix G0')

pl.figure(4)
ot.plot.plot2D_samples_mat(xs, xt, G0, c=[.5, .5, 1])
pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
pl.legend(loc=0)
pl.title('OT matrix with samples')


##############################################################################
# Compute Sinkhorn