def compute_local_rotations(sc, data, model, num_buckets): """ Analogous to the function of the same name in lopq.model. :param SparkContext sc: a SparkContext :param RDD data: an RDD of numpy arrays :param KMeansModel model: a KMeansModel instance for which to fit local rotations :param int num_buckets: the number of subvectors over which to balance residual variance """ # Get estimators A, mu, count = accumulate_covariance_estimators(sc, data, model) # Format as ndarrays V = len(model.centers) A = dict_to_ndarray(A, V) mu = dict_to_ndarray(mu, V) count = dict_to_ndarray(count, V) # Compute params R, mu = compute_rotations_from_accumulators(A, mu, count, num_buckets) return R, mu, count
def test_compute_rotations_from_accumulators(): A, mu, count, num_buckets = pkl.load(open(relpath('./testdata/test_compute_rotations_from_accumulators_input.pkl'))) expected = pkl.load(open(relpath('./testdata/test_compute_rotations_from_accumulators_output.pkl'))) actual = compute_rotations_from_accumulators(A, mu, count, num_buckets) # Rotations assert_true(np.allclose(expected[0], actual[0])) # Mean residuals assert_true(np.allclose(expected[1], actual[1]))
def test_compute_rotations_from_accumulators(): A, mu, count, num_buckets = pkl.load( open( relpath( './testdata/test_compute_rotations_from_accumulators_input.pkl' ))) expected = pkl.load( open( relpath( './testdata/test_compute_rotations_from_accumulators_output.pkl' ))) actual = compute_rotations_from_accumulators(A, mu, count, num_buckets) # Rotations assert_true(np.allclose(expected[0], actual[0])) # Mean residuals assert_true(np.allclose(expected[1], actual[1]))