def test_minibatch_update_consistency(): # Check that dense and sparse minibatch update give the same results rng = np.random.RandomState(42) old_centers = centers + rng.normal(size=centers.shape) new_centers = old_centers.copy() new_centers_csr = old_centers.copy() weight_sums = np.zeros(new_centers.shape[0], dtype=np.double) weight_sums_csr = np.zeros(new_centers.shape[0], dtype=np.double) x_squared_norms = (X ** 2).sum(axis=1) x_squared_norms_csr = row_norms(X_csr, squared=True) buffer = np.zeros(centers.shape[1], dtype=np.double) buffer_csr = np.zeros(centers.shape[1], dtype=np.double) # extract a small minibatch X_mb = X[:10] X_mb_csr = X_csr[:10] x_mb_squared_norms = x_squared_norms[:10] x_mb_squared_norms_csr = x_squared_norms_csr[:10] sample_weight_mb = np.ones(X_mb.shape[0], dtype=np.double) # step 1: compute the dense minibatch update old_inertia, incremental_diff = _mini_batch_step( X_mb, sample_weight_mb, x_mb_squared_norms, new_centers, weight_sums, buffer, 1, None, random_reassign=False) assert old_inertia > 0.0 # compute the new inertia on the same batch to check that it decreased labels, new_inertia = _labels_inertia( X_mb, sample_weight_mb, x_mb_squared_norms, new_centers) assert new_inertia > 0.0 assert new_inertia < old_inertia # check that the incremental difference computation is matching the # final observed value effective_diff = np.sum((new_centers - old_centers) ** 2) assert_almost_equal(incremental_diff, effective_diff) # step 2: compute the sparse minibatch update old_inertia_csr, incremental_diff_csr = _mini_batch_step( X_mb_csr, sample_weight_mb, x_mb_squared_norms_csr, new_centers_csr, weight_sums_csr, buffer_csr, 1, None, random_reassign=False) assert old_inertia_csr > 0.0 # compute the new inertia on the same batch to check that it decreased labels_csr, new_inertia_csr = _labels_inertia( X_mb_csr, sample_weight_mb, x_mb_squared_norms_csr, new_centers_csr) assert new_inertia_csr > 0.0 assert new_inertia_csr < old_inertia_csr # check that the incremental difference computation is matching the # final observed value effective_diff = np.sum((new_centers_csr - old_centers) ** 2) assert_almost_equal(incremental_diff_csr, effective_diff) # step 3: check that sparse and dense updates lead to the same results assert_array_equal(labels, labels_csr) assert_array_almost_equal(new_centers, new_centers_csr) assert_almost_equal(incremental_diff, incremental_diff_csr) assert_almost_equal(old_inertia, old_inertia_csr) assert_almost_equal(new_inertia, new_inertia_csr)
def test_minibatch_update_consistency(): # Check that dense and sparse minibatch update give the same results rng = np.random.RandomState(42) centers_old = centers + rng.normal(size=centers.shape) centers_old_csr = centers_old.copy() centers_new = np.zeros_like(centers_old) centers_new_csr = np.zeros_like(centers_old_csr) weight_sums = np.zeros(centers_old.shape[0], dtype=X.dtype) weight_sums_csr = np.zeros(centers_old.shape[0], dtype=X.dtype) x_squared_norms = (X**2).sum(axis=1) x_squared_norms_csr = row_norms(X_csr, squared=True) sample_weight = np.ones(X.shape[0], dtype=X.dtype) # extract a small minibatch X_mb = X[:10] X_mb_csr = X_csr[:10] x_mb_squared_norms = x_squared_norms[:10] x_mb_squared_norms_csr = x_squared_norms_csr[:10] sample_weight_mb = sample_weight[:10] # step 1: compute the dense minibatch update old_inertia = _mini_batch_step( X_mb, x_mb_squared_norms, sample_weight_mb, centers_old, centers_new, weight_sums, np.random.RandomState(0), random_reassign=False, ) assert old_inertia > 0.0 # compute the new inertia on the same batch to check that it decreased labels, new_inertia = _labels_inertia( X_mb, sample_weight_mb, x_mb_squared_norms, centers_new ) assert new_inertia > 0.0 assert new_inertia < old_inertia # step 2: compute the sparse minibatch update old_inertia_csr = _mini_batch_step( X_mb_csr, x_mb_squared_norms_csr, sample_weight_mb, centers_old_csr, centers_new_csr, weight_sums_csr, np.random.RandomState(0), random_reassign=False, ) assert old_inertia_csr > 0.0 # compute the new inertia on the same batch to check that it decreased labels_csr, new_inertia_csr = _labels_inertia( X_mb_csr, sample_weight_mb, x_mb_squared_norms_csr, centers_new_csr ) assert new_inertia_csr > 0.0 assert new_inertia_csr < old_inertia_csr # step 3: check that sparse and dense updates lead to the same results assert_array_equal(labels, labels_csr) assert_allclose(centers_new, centers_new_csr) assert_allclose(old_inertia, old_inertia_csr) assert_allclose(new_inertia, new_inertia_csr)