def test_emd_1d_emd2_1d_with_weights(): # test emd1d gives similar results as emd n = 20 m = 30 rng = np.random.RandomState(0) u = rng.randn(n, 1) v = rng.randn(m, 1) w_u = rng.uniform(0., 1., n) w_u = w_u / w_u.sum() w_v = rng.uniform(0., 1., m) w_v = w_v / w_v.sum() M = ot.dist(u, v, metric='sqeuclidean') G, log = ot.emd(w_u, w_v, M, log=True) wass = log["cost"] G_1d, log = ot.emd_1d(u, v, w_u, w_v, metric='sqeuclidean', log=True) wass1d = log["cost"] wass1d_emd2 = ot.emd2_1d(u, v, w_u, w_v, metric='sqeuclidean', log=False) wass1d_euc = ot.emd2_1d(u, v, w_u, w_v, metric='euclidean', log=False) # check loss is similar np.testing.assert_allclose(wass, wass1d) np.testing.assert_allclose(wass, wass1d_emd2) # check loss is similar to scipy's implementation for Euclidean metric wass_sp = wasserstein_distance(u.reshape((-1, )), v.reshape((-1, )), w_u, w_v) np.testing.assert_allclose(wass_sp, wass1d_euc) # check constraints np.testing.assert_allclose(w_u, G.sum(1)) np.testing.assert_allclose(w_v, G.sum(0))
def test_emd1d_device_tf(): nx = ot.backend.TensorflowBackend() rng = np.random.RandomState(0) n = 10 x = np.linspace(0, 5, n) rho_u = np.abs(rng.randn(n)) rho_u /= rho_u.sum() rho_v = np.abs(rng.randn(n)) rho_v /= rho_v.sum() # Check that everything stays on the CPU with tf.device("/CPU:0"): xb, rho_ub, rho_vb = nx.from_numpy(x, rho_u, rho_v) emd = ot.emd_1d(xb, xb, rho_ub, rho_vb) emd2 = ot.emd2_1d(xb, xb, rho_ub, rho_vb) nx.assert_same_dtype_device(xb, emd) nx.assert_same_dtype_device(xb, emd2) if len(tf.config.list_physical_devices('GPU')) > 0: # Check that everything happens on the GPU xb, rho_ub, rho_vb = nx.from_numpy(x, rho_u, rho_v) emd = ot.emd_1d(xb, xb, rho_ub, rho_vb) emd2 = ot.emd2_1d(xb, xb, rho_ub, rho_vb) nx.assert_same_dtype_device(xb, emd) nx.assert_same_dtype_device(xb, emd2) assert nx.dtype_device(emd)[1].startswith("GPU")
def unique_local_minimum(means_star, alpha, algo): weights = np.array([alpha, 1 - alpha]) for mu_0 in np.linspace(-10 * means_star, 10 * means_star, 10): #mu_0 = np.array([mu_0, -mu_0]) if algo == "sEM": means_em, weights_sinkhorn, seq, weights_list = sinkhorn_em_algorithm_single( samples=samples, mu0=mu_0, sigma=1, log_theta0=np.log(weights).reshape(2, 1), n_iter=10, n_iter_sinkhorn=20) thetaseq = [ np.array(weights).reshape(2, 1) for i in range(len(seq)) ] else: means_em, thetaseq, seq, weights_list = em_algorithm_single( samples=samples, mu0=mu_0, sigma=1, log_theta0=np.log(weights).reshape(2, 1), n_iter=10, update_theta=(algo == "oEM")) error = ot.emd2_1d(means_em[:, 0], means_star[:, 0], thetaseq[-1].reshape(-1), weights.reshape(-1)) if error > 1: return False return True
def __call__( self, x: dict, x_0: dict, t: int = None, par: dict = None, ) -> float: # compute summary statistics, shape (n, dim), (n0, dim) s, s0 = self.sumstat(x), self.sumstat(x_0) n, n0 = s.shape[0], s0.shape[0] dim, dim0 = s.shape[1], s0.shape[1] if dim != dim0: raise ValueError(f"Sumstat dimensions do not match: {dim}!={dim0}") # unit sphere samples for Monte-Carlo approximation, # shape (n_proj, dim) sphere_samples = uniform_unit_sphere_samples( n_proj=self.n_proj, dim=dim, seed=self.seed, ) # 1d linear projections, shape (n_proj, {n, n0}) s_projs = np.dot(sphere_samples, s.T) s0_projs = np.dot(sphere_samples, s0.T) # weights (could also be passed/learned?) w, w0 = np.ones((n, )) / n, np.ones((n0, )) / n0 # approximate integral over sphere via Monte-Carlo samples cost = 0.0 for s_proj, s0_proj in zip(s_projs, s0_projs): # calculate optimal 1d earth mover's distance # this is computationally O(n*log(n)) efficient via simple sorting cost += ot.emd2_1d( x_a=s_proj, x_b=s0_proj, a=w, b=w0, metric=self.metric, p=self.p, log=False, **self.emd_1d_args, ) cost /= self.n_proj # take root to match Wasserstein distance definition if self.p < np.inf: cost = cost**(1 / self.p) return cost
def test_1d_sliced_equals_emd(): n = 100 m = 120 rng = np.random.RandomState(0) x = rng.randn(n, 1) a = rng.uniform(0, 1, n) a /= a.sum() y = rng.randn(m, 1) u = ot.utils.unif(m) res = ot.sliced_wasserstein_distance(x, y, a, u, 10, seed=42) expected = ot.emd2_1d(x.squeeze(), y.squeeze(), a, u) np.testing.assert_almost_equal(res**2, expected)
def test_emd_1d_emd2_1d(): # test emd1d gives similar results as emd n = 20 m = 30 rng = np.random.RandomState(0) u = rng.randn(n, 1) v = rng.randn(m, 1) M = ot.dist(u, v, metric='sqeuclidean') G, log = ot.emd([], [], M, log=True) wass = log["cost"] G_1d, log = ot.emd_1d(u, v, [], [], metric='sqeuclidean', log=True) wass1d = log["cost"] wass1d_emd2 = ot.emd2_1d(u, v, [], [], metric='sqeuclidean', log=False) wass1d_euc = ot.emd2_1d(u, v, [], [], metric='euclidean', log=False) # check loss is similar np.testing.assert_allclose(wass, wass1d) np.testing.assert_allclose(wass, wass1d_emd2) # check loss is similar to scipy's implementation for Euclidean metric wass_sp = wasserstein_distance(u.reshape((-1, )), v.reshape((-1, ))) np.testing.assert_allclose(wass_sp, wass1d_euc) # check constraints np.testing.assert_allclose(np.ones((n, )) / n, G.sum(1)) np.testing.assert_allclose(np.ones((m, )) / m, G.sum(0)) # check G is similar np.testing.assert_allclose(G, G_1d) # check AssertionError is raised if called on non 1d arrays u = np.random.randn(n, 2) v = np.random.randn(m, 2) with pytest.raises(AssertionError): ot.emd_1d(u, v, [], [])
def test_emd1d_type_devices(nx): rng = np.random.RandomState(0) n = 10 x = np.linspace(0, 5, n) rho_u = np.abs(rng.randn(n)) rho_u /= rho_u.sum() rho_v = np.abs(rng.randn(n)) rho_v /= rho_v.sum() for tp in nx.__type_list__: print(nx.dtype_device(tp)) xb, rho_ub, rho_vb = nx.from_numpy(x, rho_u, rho_v, type_as=tp) emd = ot.emd_1d(xb, xb, rho_ub, rho_vb) emd2 = ot.emd2_1d(xb, xb, rho_ub, rho_vb) nx.assert_same_dtype_device(xb, emd) nx.assert_same_dtype_device(xb, emd2)
means_em, weights_sinkhorn, seq, weights_list = sinkhorn_em_algorithm_single( samples=samples, mu0=np.array([np.array([-2, 0]), np.array([2, 0])]), sigma=1, log_theta0=np.array([np.log(weights[0]), np.log(weights[1])]).reshape(2, 1), n_iter=1, n_iter_sinkhorn=50) thetaseq = [np.array(weights).reshape(2, 1) for i in range(len(seq))] print(means_em) print(means) print(error(means_em, means, weights)) print(weights) print( ot.emd2_1d(means_em[:, 0], means[:, 0], thetaseq[-1].reshape(-1), weights.reshape(-1))) animate = False if animate: fig = plt.figure() camera = celluloid.Camera(fig) cm = "viridis" #intital setup proba_mode = (weights_list[0][0] / (weights_list[0][0] + weights_list[0][1])) plt.scatter(x=samples[:, 0], y=samples[:, 1], c=proba_mode,
cor = 0 for i, (imgs, labels) in enumerate(dataloader): real_imgs = Variable(imgs.type(FloatTensor)) batch_size = real_imgs.shape[0] encodings = encoder1(real_imgs) src = np.random.randint(0, batch_size, size=opt.num_paths) dest = np.random.randint(0, batch_size, size=opt.num_paths) sink_dist1 = sinkhorn1(real_imgs[src], real_imgs[dest]).to("cuda") sink_dist1[src == dest] = 0 # sink_dist2 = sinkhorn2(real_imgs[src],real_imgs[dest]).to("cuda") # sink_dist2 [src == dest] = 0 for i in range(opt.num_paths): if src[i] != dest[i]: wass1.append( ot.emd2_1d(real_imgs[src[i]].cpu().numpy(), real_imgs[dest[i]].cpu().numpy(), metric='euclidean')) euc_dist = torch.norm(encodings[src] - encodings[dest], p=2, dim=1) sink1.extend(sink_dist1[src != dest].cpu().detach().numpy()) # sink2.extend(sink_dist2[src != dest].cpu().detach().numpy()) euc.extend(euc_dist[src != dest].cpu().detach().numpy()) # corr, _ = pearsonr(sink_dist.cpu().detach().numpy(),euc_dist.cpu().detach().numpy()) # cor += corr corr1, _ = pearsonr(sink1, euc) # corr2, _ = pearsonr(sink2,euc) corr2, _ = pearsonr(wass1, euc) print(corr1, corr2) plt.figure() plt.scatter(sink1, euc) plt.xlabel("Sinkhorn Distance")