def _permutation_2sided_2trans_flipflop( n: np.ndarray, m: np.ndarray, tol: float, max_iter: int, p0: Optional[np.ndarray] = None, q0: Optional[np.ndarray] = None, ): # two-sided permutation Procrustes with 2 transformations :math:` {\(\vert PNQ-M \vert\)}^2_F` # taken from page 64 in parallel solution of svd-related problems, with applications # Pythagoras Papadimitriou, University of Manchester, 1993 # initial guesses: set P1 to identity if guess P0 is not given, and compute Q1 using 1-sided # permutation procrustes where A=(P1N), B=M, & cost = A.T B p1 = p0 if p1 is None: p1 = np.eye(m.shape[0]) q1 = _compute_permutation_hungarian(np.dot(np.dot(n.T, p1.T), m)) # compute initial error1 = |(P1)N(Q1) - M| error1 = compute_error(n, m, q1, p1) step = 0 while error1 > tol and step < max_iter: # update P1 using 1-sided permutation procrustes where A=(NQ1).T, B=M.T, & cost = A.T B # 1-sided procrustes finds the right-hand-side transformation T, so to solve for P1, one # needs to minimize |Q.T N.T P.T - M.T| which is the same as original objective function. p1 = _compute_permutation_hungarian(np.dot(np.dot(n, q1), m.T)).T # update Q1 using 1-sided permutation procrustes where A=(P1N).T, B=M, & cost = A.T B q1 = _compute_permutation_hungarian(np.dot(np.dot(n.T, p1.T), m)) error1 = compute_error(n, m, q1, p1) step += 1 if step == max_iter: print(f"Maximum iterations reached in 1st case of flip-flop! error={error1} & tol={tol}") # initial guesses: set Q2 to identity if guess Q0 is not given, and compute P2 using 1-sided # permutation procrustes where A=(NQ2).T, B=M.T, & cost = A.T B q2 = q0 if q2 is None: q2 = np.eye(m.shape[1]) p2 = _compute_permutation_hungarian(np.dot(np.dot(n, q2), m.T)).T # compute initial error2 = |(P2)N(Q2) - M| error2 = compute_error(n, m, q2, p2) step = 0 while error2 > tol and step < max_iter: # update Q2 using 1-sided permutation procrustes where A=(P2N), B=M, & cost = A.T B q2 = _compute_permutation_hungarian(np.dot(np.dot(n.T, p2.T), m)) # update P2 using 1-sided permutation procrustes where A=(NQ2).T, B=M.T, & cost = A.T B p2 = _compute_permutation_hungarian(np.dot(np.dot(n, q2), m.T)).T error2 = compute_error(n, m, q2, p2) step += 1 if step == max_iter: print(f"Maximum iterations reached in 2nd case of flip-flop! error={error1} & tol={tol}") # return permutations corresponding to the lowest error if error1 <= error2: return p1, q1, error1 return p2, q2, error2
def test_kopt_heuristic_single_identity(m): r"""Test k-opt heuristic single search algorithm with identity permutation.""" # create a random matrix A and random permutation of identity matrix a = np.random.uniform(-2.0, 2.0, (m, m)) p0 = np.eye(m) # find and check permutation for when B=A with guess p0=I perm, error = kopt_heuristic_single(lambda x: compute_error(a, a, x, x.T), p0, k=2) assert_equal(perm, np.eye(m)) assert_equal(error, 0.0) # find and check permutation for when B=A with guess p0 being swapped I p0[[m - 2, -1]] = p0[[-1, m - 2]] perm, error = kopt_heuristic_single(lambda x: compute_error(a, a, x, x.T), p0, k=2) assert_equal(perm, np.eye(m)) assert_equal(error, 0.0)
def test_kopt_heuristic_double(): r"""Test double sided k-opt heuristic search algorithm.""" np.random.seed(998) arr_b = np.random.randint(low=-10, high=10, size=(4, 3)).astype(np.float) perm1 = np.array([[0., 0., 0., 1.], [0., 1., 0., 0.], [1., 0., 0., 0.], [0., 0., 1., 0.]]) perm2 = np.array([[0., 0., 1.], [1., 0., 0.], [0., 1., 0.]]) arr_a = np.linalg.multi_dot([perm1.T, arr_b, perm2]) # shuffle the permutation matrices perm1_shuff = np.array([[0., 0., 0., 1.], [1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.]]) perm2_shuff = np.array([[1., 0., 0.], [0., 0., 1.], [0., 1., 0.]]) error = compute_error(arr_b, arr_a, perm1_shuff.T, perm2_shuff) perm_left, perm_right, kopt_error = kopt_heuristic_double( perm_p=perm1_shuff, perm_q=perm2_shuff, array_m=arr_a, array_n=arr_b, ref_error=error, kopt_k=4, kopt_tol=1.e-8) _, _, kopt_error = kopt_heuristic_double(perm_p=perm_left, perm_q=perm_right, array_m=arr_a, array_n=arr_b, ref_error=error, kopt_k=3, kopt_tol=1.e-8) assert kopt_error <= error assert kopt_error == 0
def test_kopt_heuristic_single(): r"""Test k-opt heuristic search algorithm.""" arr_a = np.array([[3, 6, 1, 0, 7], [4, 5, 2, 7, 6], [8, 6, 6, 1, 7], [4, 4, 7, 9, 4], [4, 8, 0, 3, 1]]) arr_b = np.array([[1, 8, 0, 4, 3], [6, 5, 2, 4, 7], [7, 6, 6, 8, 1], [7, 6, 1, 3, 0], [4, 4, 7, 4, 9]]) perm_guess = np.array([[0, 0, 1, 0, 0], [1, 0, 0, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], [0, 1, 0, 0, 0]]) perm_exact = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 0, 1], [1, 0, 0, 0, 0]]) error_old = compute_error(arr_a, arr_b, perm_guess, perm_guess) perm, kopt_error = kopt_heuristic_single(arr_a, arr_b, error_old, perm_guess, 3, kopt_tol=1.e-8) assert_equal(perm, perm_exact) assert kopt_error == 0 # test the error exceptions assert_raises(ValueError, kopt_heuristic_single, arr_a, arr_b, error_old, perm_guess, 1, kopt_tol=1.e-8)
def test_kopt_heuristic_double_identity(m, n): r"""Test k-opt heuristic double search algorithm with identity permutation.""" # create a random matrix A and random permutation of identity matrix a = np.random.uniform(-6.0, 6.0, (m, n)) p1, p2 = np.eye(m), np.eye(n) # find and check permutation for when B=A with guesses p1=I & p2=I perm1, perm2, error = kopt_heuristic_double(lambda x, y: compute_error(a, a, y, x.T), p1, p2, 2) assert_equal(perm1, p1) assert_equal(perm2, p2) assert_equal(error, 0.0) # find and check permutation for when B=A with guesses p1 & p2 being swapped I p1[[m - 4, -1]] = p1[[-1, m - 4]] p2[[0, -1]] = p2[[-1, 0]] perm1, perm2, error = kopt_heuristic_double(lambda x, y: compute_error(a, a, y, x.T), p1, p2, 2) assert_equal(perm1, np.eye(m)) assert_equal(perm2, np.eye(n)) assert_equal(error, 0.0)
def test_procrustes_rotation_translation(): r"""Test orthogonal Procrustes with rotation and translation.""" # initial arrays array_a = np.array([[-7.3, 2.8], [-7.1, -0.2], [4.0, 1.4], [1.3, 0]]) # rotation by 20 degree & reflection in the x-axis theta = 0.34907 rotation = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) reflection = np.array([[1, 0], [0, -1]]) array_b = np.dot(array_a, np.dot(rotation, reflection)) # procrustes without translation and scaling res = orthogonal(array_a, array_b) assert_almost_equal(res["new_a"], array_a, decimal=6) assert_almost_equal(res["new_b"], array_b, decimal=6) assert_almost_equal(res["array_u"], np.dot(rotation, reflection), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # procrustes with translation res = orthogonal(array_a, array_b, translate=True) assert_almost_equal(res["new_a"], array_a - np.mean(array_a, axis=0), decimal=6) assert_almost_equal(res["new_b"], array_b - np.mean(array_b, axis=0), decimal=6) assert_almost_equal(res["array_u"], np.dot(rotation, reflection), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # procrustes with translation and scaling res = orthogonal(array_a, array_b, translate=True, scale=True) assert_almost_equal(res["array_u"], np.dot(rotation, reflection), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6)
def test_procrustes_shifted(): r"""Test orthogonal Procrustes with shifted array.""" # square array array_a = np.array([[3.5, 0.1, 7.0], [0.5, 2.0, 1.0], [8.1, 0.3, 0.7]]) # expected_a = array_a - np.mean(array_a, axis=0) # constant shift array_b = array_a + 4.1 res = orthogonal(array_a, array_b, translate=True) # assert_almost_equal(new_b, array_b, decimal=6) assert_almost_equal(res["array_u"], np.eye(3), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # different shift along each axis array_b = array_a + np.array([0, 3.2, 5.0]) res = orthogonal(array_a, array_b, translate=True) # assert_almost_equal(new_b, array_b, decimal=6) assert_almost_equal(res["array_u"], np.eye(3), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # rectangular (2 by 3) array_a = np.array([[1, 2, 3], [7, 9, 5]]) # expected_a = array_a - np.array([4., 5.5, 4.]) # constant shift array_b = array_a + 0.71 res = orthogonal(array_a, array_b, translate=True) # assert_almost_equal(new_b, array_b, decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # different shift along each axis array_b = array_a + np.array([0.3, 7.1, 4.2]) res = orthogonal(array_a, array_b, translate=True) # assert_almost_equal(new_b, array_b, decimal=6) assert_equal(res["array_u"].shape, (3, 3)) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6)
def kopt_heuristic_single(array_a, array_b, ref_error, perm=None, kopt_k=3, kopt_tol=1.e-8): r"""K-opt heuristic to improve the accuracy for two-sided permutation with one transformation. Perform k-opt local search with every possible valid combination of the swapping mechanism. Parameters ---------- array_a : ndarray The array to be permuted. array_b : ndarray The reference array. ref_error : float The reference error value. perm : ndarray, optional The permutation array which remains to be processed with k-opt local search. Default is the identity matrix with the same shape of array_a. kopt_k : int, optional Defines the oder of k-opt heuristic local search. For example, kopt_k=3 leads to a local search of 3 items and kopt_k=2 only searches for two items locally. Default=3. kopt_tol : float, optional Tolerance value to check if k-opt heuristic converges. Default=1.e-8. Returns ------- perm : ndarray The permutation array after optimal heuristic search. kopt_error : float The error distance of two arrays with the updated permutation array. """ if kopt_k < 2: raise ValueError("Kopt_k value must be a integer greater than 2.") # if perm is not specified, use the identity matrix as default if perm is None: perm = np.identity(np.shape(array_a)[0]) num_row = perm.shape[0] kopt_error = ref_error # all the possible row-wise permutations for comb in it.combinations(np.arange(num_row), r=kopt_k): for comb_perm in it.permutations(comb, r=kopt_k): if comb_perm != comb: perm_kopt = deepcopy(perm) perm_kopt[comb, :] = perm_kopt[comb_perm, :] e_kopt_new = compute_error(array_a, array_b, perm_kopt, perm_kopt) if e_kopt_new < kopt_error: perm = perm_kopt kopt_error = e_kopt_new if kopt_error <= kopt_tol: break return perm, kopt_error
def test_kopt_heuristic_single_all_permutations(m): r"""Test k-opt heuristic single search algorithm going through all permutations.""" # create a random matrix A and random permutation of identity matrix a = np.random.uniform(-10.0, 10.0, (m, m)) p = np.random.permutation(np.eye(m)) # compute B = P^T A P b = np.linalg.multi_dot([p.T, a, p]) # find and check permutation perm, error = kopt_heuristic_single(lambda x: compute_error(a, b, x, x.T), np.eye(m), m) assert_equal(perm, p) assert_equal(error, 0.0)
def test_procrustes_reflection_square(): r"""Test orthogonal Procrustes with reflected squared array.""" # square array array_a = np.array([[2.0, 0.1], [0.5, 3.0]]) # reflection through origin array_b = -array_a res = orthogonal(array_a, array_b) assert_almost_equal(res["new_a"], array_a, decimal=6) assert_almost_equal(res["new_b"], array_b, decimal=6) assert_almost_equal(res["array_u"], np.array([[-1, 0], [0, -1]]), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # reflection in the x-axis array_b = np.array([[2.0, -0.1], [0.5, -3.0]]) res = orthogonal(array_a, array_b) assert_almost_equal(res["array_u"], np.array([[1, 0], [0, -1]]), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # reflection in the y-axis array_b = np.array([[-2.0, 0.1], [-0.5, 3.0]]) res = orthogonal(array_a, array_b) assert_almost_equal(res["array_u"], np.array([[-1, 0], [0, 1]]), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # reflection in the line y=x array_b = np.array([[0.1, 2.0], [3.0, 0.5]]) res = orthogonal(array_a, array_b) assert_almost_equal(res["array_u"], np.array([[0, 1], [1, 0]]), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6)
def test_kopt_heuristic_single_k_permutations(m): r"""Test k-opt heuristic single search algorithm going upto k permutations.""" # create a random matrix A a = np.random.uniform(-10.0, 10.0, (m, m)) # create permutation matrix by swapping rows m-3 & -1 of identity matrix (this makes sures that # heuristic algorithm only finds the solution towards the end of its search) p = np.eye(m) p[[m - 3, -1]] = p[[-1, m - 3]] # compute B = P^T A P b = np.linalg.multi_dot([p.T, a, p]) # find and check permutation perm, error = kopt_heuristic_single(lambda x: compute_error(a, b, x, x.T), np.eye(m), k=2) assert_equal(perm, p) assert_equal(error, 0.0)
def test_kopt_heuristic_double_all_permutations(m, n): r"""Test k-opt heuristic double search algorithm going through all permutations.""" # create a random matrix A and random permutation of identity matrix a = np.random.uniform(-5.0, 5.0, (m, n)) p1 = np.random.permutation(np.eye(m)) p2 = np.random.permutation(np.eye(n)) # compute B = P1^T A P2 b = np.linalg.multi_dot([p1.T, a, p2]) # find and check permutations perm1, perm2, error = kopt_heuristic_double(lambda x, y: compute_error(a, b, y, x.T), np.eye(m), np.eye(n), max(n, m)) assert_equal(perm1, p1) assert_equal(perm2, p2) assert_equal(error, 0.0)
def test_procrustes_orthogonal_identical(): r"""Test orthogonal Procrustes with identity matrix.""" # case of identical square arrays array_a = np.arange(9).reshape(3, 3) array_b = np.copy(array_a) res = orthogonal(array_a, array_b) # check transformation array is identity assert_almost_equal(res["new_a"], array_a, decimal=6) assert_almost_equal(res["new_b"], array_b, decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # case of identical rectangular arrays (2 by 4) array_a = np.array([[1, 5, 6, 7], [1, 2, 9, 4]]) array_b = np.copy(array_a) res = orthogonal(array_a, array_b) assert_almost_equal(res["new_a"], array_a, decimal=6) assert_almost_equal(res["new_b"], array_b, decimal=6) assert_equal(res["array_u"].shape, (4, 4)) # assert_almost_equal(array_u, np.eye(4), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # case of identical rectangular arrays (5 by 3) array_a = np.arange(15).reshape(5, 3) array_b = np.copy(array_a) res = orthogonal(array_a, array_b) assert_almost_equal(res["new_a"], array_a, decimal=6) assert_almost_equal(res["new_b"], array_b, decimal=6) assert_equal(res["array_u"].shape, (3, 3)) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6)
def test_kopt_heuristic_double_k_permutations(m, n): r"""Test k-opt heuristic double search algorithm going upto k permutations.""" # create a random matrix A a = np.random.uniform(-7.0, 7.0, (m, n)) # create permutation matrix by swapping rows m-3 & -1 of identity matrix (this makes sures that # heuristic algorithm only finds the solution towards the end of its search) p1 = np.eye(m) p1[[m - 2, -1]] = p1[[-1, m - 2]] p2 = np.eye(n) p2[[n - 1, -1]] = p2[[-1, n - 1]] # compute B = P^T A P b = np.linalg.multi_dot([p1.T, a, p2]) # find and check permutation perm1, perm2, error = kopt_heuristic_double(lambda x, y: compute_error(a, b, y, x.T), np.eye(m), np.eye(n), k=2) assert_equal(perm1, p1) assert_equal(perm2, p2) assert_equal(error, 0.0)
def test_rotation_translate_scale(): r"""Test orthogonal Procrustes with rotation, translation and scaling.""" # initial arrays array_a = np.array([[5.1, 0], [-1.1, 4.8], [3.9, 7.3], [9.1, 6.3]]) # rotation by 68 degree & reflection in the Y=X theta = 1.18682 rotation = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) reflection = np.array([[0, 1], [1, 0]]) array_b = np.dot(4 * array_a + 3.0, np.dot(rotation, reflection)) # procrustes with translation and scaling res = orthogonal(array_a, array_b, translate=True, scale=True) assert_almost_equal(res["array_u"], np.dot(rotation, reflection), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6)
def test_orthogonal_translate_scale2(): r"""Test orthogonal Procrustes with rotation, translation and scaling with a different array.""" # initial array array_a = np.array([[1, 4], [7, 9]]) # define a transformation composed of rotation & reflection theta = np.pi / 2 rot = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) ref = np.array([[1, 0], [0, -1]]) trans = np.dot(rot, ref) # define array_b by transforming array_a and padding with zero array_b = np.dot(array_a, trans) array_b = np.concatenate((array_b, np.zeros((2, 5))), axis=1) array_b = np.concatenate((array_b, np.zeros((5, 7))), axis=0) # compute procrustes transformation res = orthogonal(array_a, array_b, translate=False, scale=False) assert_almost_equal(res["array_u"], np.dot(rot, ref), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6)
def test_procrustes_rotation_square(): r"""Test orthogonal Procrustes with squared array.""" # square array array_a = np.arange(4).reshape(2, 2) # rotation by 90 degree array_b = np.array([[1, 0], [3, -2]]) res = orthogonal(array_a, array_b) assert_almost_equal(res["array_u"], np.array([[0., -1.], [1., 0.]]), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # rotation by 180 degree array_b = -array_a res = orthogonal(array_a, array_b) assert_almost_equal(res["array_u"], np.array([[-1., 0.], [0., -1.]]), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # rotation by 270 degree array_b = np.array([[-1, 0], [-3, 2]]) res = orthogonal(array_a, array_b) assert_almost_equal(res["array_u"], np.array([[0., 1.], [-1., 0.]]), decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # rotation by 45 degree rotation = 0.5 * np.sqrt(2) * np.array([[1, -1], [1, 1]]) array_b = np.dot(array_a, rotation) res = orthogonal(array_a, array_b) assert_almost_equal(res["array_u"], rotation, decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # rotation by 30 degree theta = np.pi / 6 rotation = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) array_b = np.dot(array_a, rotation) res = orthogonal(array_a, array_b) assert_almost_equal(res["array_u"], rotation, decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6) # rotation by 72 degree theta = 1.25664 rotation = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) array_b = np.dot(array_a, rotation) res = orthogonal(array_a, array_b) assert_almost_equal(res["array_u"], rotation, decimal=6) assert_almost_equal(compute_error(res["new_a"], res["new_b"], res["array_u"]), 0., decimal=6)
def permutation(array_a, array_b, remove_zero_col=True, remove_zero_row=True, pad_mode="row-col", translate=False, scale=False, check_finite=True, weight=None): r""" Single sided permutation Procrustes. Parameters ---------- array_a : ndarray The 2d-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed. array_b : ndarray The 2d-array :math:`\mathbf{B}_{m \times n}` representing the reference. remove_zero_col : bool, optional If True, the zero columns on the right side will be removed. Default= True. remove_zero_row : bool, optional If True, the zero rows on the top will be removed. Default= True. pad_mode : str, optional Specifying how to pad the arrays, listed below. Default="row-col". - "row" The array with fewer rows is padded with zero rows so that both have the same number of rows. - "col" The array with fewer columns is padded with zero columns so that both have the same number of columns. - "row-col" The array with fewer rows is padded with zero rows, and the array with fewer columns is padded with zero columns, so that both have the same dimensions. This does not necessarily result in square arrays. - "square" The arrays are padded with zero rows and zero columns so that they are both squared arrays. The dimension of square array is specified based on the highest dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`. translate : bool, optional If True, both arrays are translated to be centered at origin, ie columns of the arrays will have mean zero. Default=False. scale : bool, optional If True, both arrays are normalized to one with respect to the Frobenius norm, ie :math:`Tr(A^T A) = 1`. Default=False. check_finite : bool, optional If true, convert the input to an array, checking for NaNs or Infs. Default=True. weight : ndarray The weighting matrix. Default=None. Returns ------- res: ProcrustesResult Procrustes analysis result object. Attributes ---------- A : ndarray The transformed ndarray A. B : ndarray The transformed ndarray B. array_u : ndarray The optimum permutation transformation matrix. error : float One-sided permutation Procrustes error. Notes ----- Given matrix :math:`\mathbf{A}_{n \times n}` and reference :math:`\mathbf{B}_{n \times n}` find a permutation of the rows and/or columns of :math:`\mathbf{A}_{n \times n}` that makes it as close as possible to :math:`\mathbf{B}_{n \times n}`. I.e., .. math:: \underbrace{\text{min}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \|\mathbf{A} \mathbf{P} - \mathbf{B}\|_{F}^2 &= \underbrace{\text{min}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\left(\mathbf{A}\mathbf{P} - \mathbf{B} \right)^\dagger \left(\mathbf{P}^\dagger\mathbf{A}\mathbf{P} - \mathbf{B} \right)\right] \\ &= \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{A}^\dagger\mathbf{B} \right] Here, :math:`\mathbf{P}_{n \times n}` is the permutation matrix. The solution is to relax the problem into a linear programming problem and note that the solution to a linear programming problem is always at the boundary of the allowed region, which means that the solution can always be written as a permutation matrix, .. math:: \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{A}^\dagger\mathbf{B} \right] = \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \geq 0 \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\left(\mathbf{A}^\dagger\mathbf{B}\right) \right] This is a matching problem and can be solved by the Hungarian method. Note that if :math:`\mathbf{A}` and :math:`\mathbf{B}` have different numbers of items, you choose the larger matrix as :math:`\mathbf{B}` and then pad :math:`\mathbf{A}` with rows/columns of zeros. """ # check inputs new_a, new_b = setup_input_arrays(array_a, array_b, remove_zero_col, remove_zero_row, pad_mode, translate, scale, check_finite, weight) # compute permutation Procrustes matrix array_p = np.dot(new_a.T, new_b) array_c = np.full(array_p.shape, np.max(array_p)) array_c -= array_p array_u = np.zeros(array_p.shape) # set elements to 1 according to Hungarian algorithm (linear_sum_assignment) array_u[linear_sum_assignment(array_c)] = 1 error = compute_error(new_a, new_b, array_u) # return new_a, new_b, array_u, error return ProcrustesResult(new_a=new_a, new_b=new_b, array_u=array_u, error=error)
def permutation( a, b, pad=True, translate=False, scale=False, unpad_col=False, unpad_row=False, check_finite=True, weight=None, ): r"""Perform one-sided permutation Procrustes. Given matrix :math:`\mathbf{A}_{m \times n}` and a reference matrix :math:`\mathbf{B}_{m \times n}`, find the permutation transformation matrix :math:`\mathbf{P}_{n \times n}` that makes :math:`\mathbf{AP}` as close as possible to :math:`\mathbf{B}`. In other words, .. math:: \underbrace{\text{min}}_{\left\{\mathbf{P} \left| {[\mathbf{P}]_{ij} \in \{0, 1\} \atop \sum_{i=1}^n [\mathbf{P}]_{ij} = \sum_{j=1}^n [\mathbf{P}]_{ij} = 1} \right. \right\}} \|\mathbf{A} \mathbf{P} - \mathbf{B}\|_{F}^2 This Procrustes method requires the :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices to have the same shape, which is guaranteed with the default ``pad=True`` argument for any given :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices. In preparing the :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices, the (optional) order of operations is: **1)** unpad zero rows/columns, **2)** translate the matrices to the origin, **3)** weight entries of :math:`\mathbf{A}`, **4)** scale the matrices to have unit norm, **5)** pad matrices with zero rows/columns so they have the same shape. Parameters ---------- a : ndarray The 2d-array :math:`\mathbf{A}` which is going to be transformed. b : ndarray The 2d-array :math:`\mathbf{B}` representing the reference matrix. pad : bool, optional Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape. translate : bool, optional If True, both arrays are centered at origin (columns of the arrays will have mean zero). scale : bool, optional If True, both arrays are normalized with respect to the Frobenius norm, i.e., :math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and :math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`. unpad_col : bool, optional If True, zero columns (with values less than 1.0e-8) on the right-hand side are removed. unpad_row : bool, optional If True, zero rows (with values less than 1.0e-8) at the bottom are removed. check_finite : bool, optional If True, convert the input to an array, checking for NaNs or Infs. weight : ndarray, optional The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}` matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`. Returns ------- res : ProcrustesResult The Procrustes result represented as a class:`utils.ProcrustesResult` object. Notes ----- The optimal :math:`n \times n` permutation matrix is obtained by, .. math:: \mathbf{P}^{\text{opt}} = \arg \underbrace{\text{min}}_{\left\{\mathbf{P} \left| {[\mathbf{P}]_{ij} \in \{0, 1\} \atop \sum_{i=1}^n [\mathbf{P}]_{ij} = \sum_{j=1}^n [\mathbf{P}]_{ij} = 1} \right. \right\}} \|\mathbf{A} \mathbf{P} - \mathbf{B}\|_{F}^2 = \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {[\mathbf{P}]_{ij} \in \{0, 1\} \atop \sum_{i=1}^n [\mathbf{P}]_{ij} = \sum_{j=1}^n [\mathbf{P}]_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{A}^\dagger\mathbf{B} \right] The solution is found by relaxing the problem into a linear programming problem. The solution to a linear programming problem is always at the boundary of the allowed region. So, .. math:: \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {[\mathbf{P}]_{ij} \in \{0, 1\} \atop \sum_{i=1}^n [\mathbf{P}]_{ij} = \sum_{j=1}^n [\mathbf{P}]_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{A}^\dagger\mathbf{B} \right] = \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {[\mathbf{P}]_{ij} \geq 0 \atop \sum_{i=1}^n [\mathbf{P}]_{ij} = \sum_{j=1}^n [\mathbf{P}]_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\left(\mathbf{A}^\dagger\mathbf{B}\right) \right] This is a matching problem and can be solved by the Hungarian algorithm. The cost matrix is defined as :math:`\mathbf{A}^\dagger\mathbf{B}` and the `scipy.optimize.linear_sum_assignment` is used to solve for the permutation that maximizes the linear sum assignment problem. """ # check inputs new_a, new_b = setup_input_arrays( a, b, unpad_col, unpad_row, pad, translate, scale, check_finite, weight, ) # if number of rows is less than column, the arrays are made square if (new_a.shape[0] < new_a.shape[1]) or (new_b.shape[0] < new_b.shape[1]): new_a, new_b = _zero_padding(new_a, new_b, "square") # compute cost matrix C = A.T B c = np.dot(new_a.T, new_b) # compute permutation matrix using Hungarian algorithm p = _compute_permutation_hungarian(c) # compute one-sided permutation error error = compute_error(new_a, new_b, p) return ProcrustesResult(new_a=new_a, new_b=new_b, t=p, error=error)
def orthogonal( a: np.ndarray, b: np.ndarray, pad: bool = True, translate: bool = False, scale: bool = False, unpad_col: bool = False, unpad_row: bool = False, check_finite: bool = True, weight: Optional[np.ndarray] = None, lapack_driver: str = "gesvd", ) -> ProcrustesResult: r"""Perform orthogonal Procrustes. Given a matrix :math:`\mathbf{A}_{m \times n}` and a reference matrix :math:`\mathbf{B}_{m \times n}`, find the orthogonal transformation matrix :math:`\mathbf{Q}_{n \times n}` that makes :math:`\mathbf{AQ}` as close as possible to :math:`\mathbf{B}`. In other words, .. math:: \underbrace{\min}_{\left\{\mathbf{Q} | \mathbf{Q}^{-1} = {\mathbf{Q}}^\dagger \right\}} \|\mathbf{A}\mathbf{Q} - \mathbf{B}\|_{F}^2 This Procrustes method requires the :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices to have the same shape, which is gauranteed with the default ``pad`` argument for any given :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices. In preparing the :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices, the (optional) order of operations is: **1)** unpad zero rows/columns, **2)** translate the matrices to the origin, **3)** weight entries of :math:`\mathbf{A}`, **4)** scale the matrices to have unit norm, **5)** pad matrices with zero rows/columns so they have the same shape. Parameters ---------- a : ndarray The 2D-array :math:`\mathbf{A}` which is going to be transformed. b : ndarray The 2D-array :math:`\mathbf{B}` representing the reference matrix. pad : bool, optional Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape. translate : bool, optional If True, both arrays are centered at origin (columns of the arrays will have mean zero). scale : bool, optional If True, both arrays are normalized with respect to the Frobenius norm, i.e., :math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and :math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`. unpad_col : bool, optional If True, zero columns (with values less than 1.0e-8) on the right-hand side of the intial :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. unpad_row : bool, optional If True, zero rows (with values less than 1.0e-8) at the bottom of the intial :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. check_finite : bool, optional If True, convert the input to an array, checking for NaNs or Infs. weight : ndarray, optional The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}` matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`. lapack_driver : {'gesvd', 'gesdd'}, optional Whether to use the more efficient divide-and-conquer approach ('gesdd') or the more robust general rectangular approach ('gesvd') to compute the singular-value decomposition with `scipy.linalg.svd`. Returns ------- res : ProcrustesResult The Procrustes result represented as a class:`utils.ProcrustesResult` object. Notes ----- The optimal orthogonal matrix is obtained by, .. math:: \mathbf{Q}^{\text{opt}} = \arg \underbrace{\min}_{\left\{\mathbf{Q} \left| {\mathbf{Q}^{-1} = {\mathbf{Q}}^\dagger} \right. \right\}} \|\mathbf{A}\mathbf{Q} - \mathbf{B}\|_{F}^2 = \arg \underbrace{\max}_{\left\{\mathbf{Q} \left| {\mathbf{Q}^{-1} = {\mathbf{Q}}^\dagger} \right. \right\}} \text{Tr}\left[\mathbf{Q^\dagger}\mathbf{A^\dagger}\mathbf{B}\right] The solution is obtained using the singular value decomposition (SVD) of the :math:`\mathbf{A}^\dagger \mathbf{B}` matrix, .. math:: \mathbf{A}^\dagger \mathbf{B} &= \tilde{\mathbf{U}} \tilde{\mathbf{\Sigma}} \tilde{\mathbf{V}}^{\dagger} \\ \mathbf{Q}^{\text{opt}} &= \tilde{\mathbf{U}} \tilde{\mathbf{V}}^{\dagger} The singular values are always listed in decreasing order, with the smallest singular value in the bottom-right-hand corner of :math:`\tilde{\mathbf{\Sigma}}`. Examples -------- >>> import numpy as np >>> from scipy.stats import ortho_group >>> from procrustes import orthogonal >>> a = np.random.rand(5, 3) # random input matrix >>> q = ortho_group.rvs(3) # random orthogonal transformation >>> b = np.dot(a, q) + np.random.rand(1, 3) # random target matrix >>> result = orthogonal(a, b, translate=True, scale=False) >>> print(result.error) # error (should be zero) >>> print(result.t) # transformation matrix (same as q) >>> print(result.new_a) # translated array a >>> print(result.new_b) # translated array b """ # check inputs new_a, new_b = setup_input_arrays( a, b, unpad_col, unpad_row, pad, translate, scale, check_finite, weight, ) if new_a.shape != new_b.shape: raise ValueError( f"Shape of A and B does not match: {new_a.shape} != {new_b.shape} " "Check pad, unpad_col, and unpad_row arguments." ) # calculate SVD of A.T * B u, _, vt = scipy.linalg.svd(np.dot(new_a.T, new_b), lapack_driver=lapack_driver) # compute optimal orthogonal transformation u_opt = np.dot(u, vt) # compute one-sided error error = compute_error(new_a, new_b, u_opt) return ProcrustesResult(error=error, new_a=new_a, new_b=new_b, t=u_opt, s=None)
def orthogonal_2sided( a: np.ndarray, b: np.ndarray, single: bool = True, pad: bool = True, translate: bool = False, scale: bool = False, unpad_col: bool = False, unpad_row: bool = False, check_finite: bool = True, weight: Optional[np.ndarray] = None, lapack_driver: str = "gesvd", ) -> ProcrustesResult: r"""Perform two-sided orthogonal Procrustes with one- or two-transformations. **Two Transformations:** Given a matrix :math:`\mathbf{A}_{m \times n}` and a reference matrix :math:`\mathbf{B}_{m \times n}`, find two :math:`n \times n` orthogonal transformation matrices :math:`\mathbf{Q}_1^\dagger` and :math:`\mathbf{Q}_2` that makes :math:`\mathbf{Q}_1^\dagger\mathbf{A}\mathbf{Q}_2` as close as possible to :math:`\mathbf{B}`. In other words, .. math:: \underbrace{\text{min}}_{\left\{ {\mathbf{Q}_1 \atop \mathbf{Q}_2} \left| {\mathbf{Q}_1^{-1} = \mathbf{Q}_1^\dagger \atop \mathbf{Q}_2^{-1} = \mathbf{Q}_2^\dagger} \right. \right\}} \|\mathbf{Q}_1^\dagger \mathbf{A} \mathbf{Q}_2 - \mathbf{B}\|_{F}^2 **Single Transformations:** Given a **symmetric** matrix :math:`\mathbf{A}_{n \times n}` and a reference :math:`\mathbf{B}_{n \times n}`, find one orthogonal transformation matrix :math:`\mathbf{Q}_{n \times n}` that makes :math:`\mathbf{A}` as close as possible to :math:`\mathbf{B}`. In other words, .. math:: \underbrace{\min}_{\left\{\mathbf{Q} | \mathbf{Q}^{-1} = {\mathbf{Q}}^\dagger \right\}} \|\mathbf{Q}^\dagger\mathbf{A}\mathbf{Q} - \mathbf{B}\|_{F}^2 This Procrustes method requires the :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices to have the same shape, which is gauranteed with the default ``pad`` argument for any given :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices. In preparing the :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices, the (optional) order of operations is: **1)** unpad zero rows/columns, **2)** translate the matrices to the origin, **3)** weight entries of :math:`\mathbf{A}`, **4)** scale the matrices to have unit norm, **5)** pad matrices with zero rows/columns so they have the same shape. Parameters ---------- a : ndarray The 2D-array :math:`\mathbf{A}` which is going to be transformed. b : ndarray The 2D-array :math:`\mathbf{B}` representing the reference matrix. single : bool, optional If True, single transformation is used (i.e., :math:`\mathbf{Q}_1=\mathbf{Q}_2=\mathbf{Q}`), otherwise, two transformations are used. pad : bool, optional Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape. translate : bool, optional If True, both arrays are centered at origin (columns of the arrays will have mean zero). scale : bool, optional If True, both arrays are normalized with respect to the Frobenius norm, i.e., :math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and :math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`. unpad_col : bool, optional If True, zero columns (with values less than 1.0e-8) on the right-hand side of the intial :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. unpad_row : bool, optional If True, zero rows (with values less than 1.0e-8) at the bottom of the intial :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. check_finite : bool, optional If True, convert the input to an array, checking for NaNs or Infs. weight : ndarray, optional The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}` matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`. lapack_driver : {"gesvd", "gesdd"}, optional Used in the singular value decomposition function from SciPy. Only allowed two options, with "gesvd" being less-efficient than "gesdd" but is more robust. Default is "gesvd". Returns ------- res : ProcrustesResult The Procrustes result represented as a class:`utils.ProcrustesResult` object. Notes ----- **Two-Sided Orthogonal Procrustes with Two Transformations:** The optimal orthogonal transformations are obtained by: .. math:: \mathbf{Q}_{1}^{\text{opt}}, \mathbf{Q}_{2}^{\text{opt}} = \arg \underbrace{\text{min}}_{\left\{ {\mathbf{Q}_1 \atop \mathbf{Q}_2} \left| {\mathbf{Q}_1^{-1} = \mathbf{Q}_1^\dagger \atop \mathbf{Q}_2^{-1} = \mathbf{Q}_2^\dagger} \right. \right\}} \|\mathbf{Q}_1^\dagger \mathbf{A} \mathbf{Q}_2 - \mathbf{B}\|_{F}^2 = \arg \underbrace{\text{max}}_{\left\{ {\mathbf{Q}_1 \atop \mathbf{Q}_2} \left| {\mathbf{Q}_1^{-1} = \mathbf{Q}_1^\dagger \atop \mathbf{Q}_2^{-1} = \mathbf{Q}_2^\dagger} \right. \right\}} \text{Tr}\left[\mathbf{Q}_2^\dagger\mathbf{A}^\dagger\mathbf{Q}_1\mathbf{B} \right] This is solved by taking the singular value decomposition (SVD) of :math:`\mathbf{A}` and :math:`\mathbf{B}`, .. math:: \mathbf{A} = \mathbf{U}_A \mathbf{\Sigma}_A \mathbf{V}_A^\dagger \\ \mathbf{B} = \mathbf{U}_B \mathbf{\Sigma}_B \mathbf{V}_B^\dagger Then the two optimal orthogonal matrices are given by, .. math:: \mathbf{Q}_1^{\text{opt}} = \mathbf{U}_A \mathbf{U}_B^\dagger \\ \mathbf{Q}_2^{\text{opt}} = \mathbf{V}_A \mathbf{V}_B^\dagger **Two-Sided Orthogonal Procrustes with Single-Transformation:** The optimal orthogonal transformation is obtained by: .. math:: \mathbf{Q}^{\text{opt}} = \arg \underbrace{\min}_{\left\{\mathbf{Q} | \mathbf{Q}^{-1} = {\mathbf{Q}}^\dagger \right\}} \|\mathbf{Q}^\dagger\mathbf{A}\mathbf{Q} - \mathbf{B}\|_{F}^2 = \arg \underbrace{\text{max}}_{\left\{\mathbf{Q} | \mathbf{Q}^{-1} = {\mathbf{Q}}^\dagger\right\}} \text{Tr}\left[\mathbf{Q}^\dagger\mathbf{A}^\dagger\mathbf{Q}\mathbf{B} \right] Using the singular value decomposition (SVD) of :math:`\mathbf{A}` and :math:`\mathbf{B}`, .. math:: \mathbf{A} = \mathbf{U}_A \mathbf{\Lambda}_A \mathbf{U}_A^\dagger \\ \mathbf{B} = \mathbf{U}_B \mathbf{\Lambda}_B \mathbf{U}_B^\dagger The optimal orthogonal matrix :math:`\mathbf{Q}^\text{opt}` is obtained through, .. math:: \mathbf{Q}^\text{opt} = \mathbf{U}_A \mathbf{S} \mathbf{U}_B^\dagger where :math:`\mathbf{S}` is a diagonal matrix with :math:`\pm{1}` elements, .. math:: \mathbf{S} = \begin{bmatrix} { \pm 1} & 0 &\cdots &0 \\ 0 &{ \pm 1} &\ddots &\vdots \\ \vdots &\ddots &\ddots &0\\ 0 &\cdots &0 &{ \pm 1} \end{bmatrix} The matrix :math:`\mathbf{S}` is chosen to be the identity matrix. Examples -------- >>> import numpy as np >>> a = np.array([[30, 33, 20], [33, 53, 43], [20, 43, 46]]) >>> b = np.array([[ 22.78131838, -0.58896768,-43.00635291, 0., 0.], ... [ -0.58896768, 16.77132475, 0.24289990, 0., 0.], ... [-43.00635291, 0.2428999 , 89.44735687, 0., 0.], ... [ 0. , 0. , 0. , 0., 0.]]) >>> res = orthogonal_2sided(a, b, single=True, pad=True, unpad_col=True) >>> res.t array([[ 0.25116633, 0.76371527, 0.59468855], [-0.95144277, 0.08183302, 0.29674906], [ 0.17796663, -0.64034549, 0.74718507]]) >>> res.error 1.9646186414076689e-26 """ # if translate: # warnings.warn( # "The translation matrix was not well defined. \ # Two sided rotation and translation don't commute.", # stacklevel=2, # ) # Check inputs new_a, new_b = setup_input_arrays( a, b, unpad_col, unpad_row, pad, translate, scale, check_finite, weight, ) # check symmetry if single_transform=True if single: if not np.allclose(new_a.T, new_a): raise ValueError( f"Array A with {new_a.shape} shape is not symmetric. " "Check pad, unpad_col, and unpad_row arguments." ) if not np.allclose(new_b.T, new_b): raise ValueError( f"Array B with {new_b.shape} shape is not symmetric. " "Check pad, unpad_col, and unpad_row arguments." ) # two-sided orthogonal Procrustes with one-transformations if single: _, ua = np.linalg.eigh(new_a) _, ub = np.linalg.eigh(new_b) u_opt = np.dot(ua, ub.T) # compute one-sided error error = compute_error(new_a, new_b, u_opt, u_opt.T) return ProcrustesResult(error=error, new_a=new_a, new_b=new_b, t=u_opt, s=u_opt.T) # two-sided orthogonal Procrustes with two-transformations ua, _, vta = scipy.linalg.svd(new_a, lapack_driver=lapack_driver) ub, _, vtb = scipy.linalg.svd(new_b, lapack_driver=lapack_driver) u_opt1 = np.dot(ua, ub.T) u_opt2 = np.dot(vta.T, vtb) error = compute_error(new_a, new_b, u_opt2, u_opt1.T) return ProcrustesResult(error=error, new_a=new_a, new_b=new_b, t=u_opt2, s=u_opt1.T)
def rotational(array_a, array_b, remove_zero_col=True, remove_zero_row=True, pad_mode="row-col", translate=False, scale=False, check_finite=True, weight=None): r""" Compute optimal rotational transformation array. The Procrustes analysis requires two 2d-arrays with the same number of rows, so the array with the smaller number of rows will automatically be padded with zero rows. Parameters ---------- array_a : ndarray The 2d-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed. array_b : ndarray The 2d-array :math:`\mathbf{B}_{m \times n}` representing the reference array. remove_zero_col : bool, optional If True, zero columns (values less than 1e-8) on the right side will be removed. Default= True. remove_zero_row : bool, optional If True, zero rows (values less than 1e-8) on the bottom will be removed. Default= True. pad_mode : str, optional Specifying how to pad the arrays, listed below. Default="row-col". - "row" The array with fewer rows is padded with zero rows so that both have the same number of rows. - "col" The array with fewer columns is padded with zero columns so that both have the same number of columns. - "row-col" The array with fewer rows is padded with zero rows, and the array with fewer columns is padded with zero columns, so that both have the same dimensions. This does not necessarily result in square arrays. - "square" The arrays are padded with zero rows and zero columns so that they are both squared arrays. The dimension of square array is specified based on the highest dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`. translate : bool, optional If True, both arrays are translated to be centered at origin, ie columns of the arrays will have mean zero. Default=False. scale : bool, optional If True, both arrays are normalized to one with respect to the Frobenius norm, ie :math:`Tr(A^T A) = 1`. Default=False. check_finite : bool, optional If true, convert the input to an array, checking for NaNs or Infs. weight : ndarray The weighting matrix. Default=None. Returns ------- res : ProcrustesResult Procrustes analysis result object. Attributes ---------- new_a : ndarray The transformed ndarray :math:`A`. new_b : ndarray The transformed ndarray :math:`B`. array_u : ndarray The optimum rotational transformation matrix. error : float One-sided orthogonal Procrustes error. Notes ----- Given matrix :math:`\mathbf{A}_{m \times n}` and a reference :math:`\mathbf{B}_{m \times n}`, find the transformation of :math:`\mathbf{A}_{m \times n}` that makes it as close as possible to :math:`\mathbf{B}_{m \times n}`. I.e., .. math:: \underbrace{\min}_{\left\{\mathbf{U} \left| {\mathbf{U}^{-1} = {\mathbf{U}}^\dagger \atop \left| \mathbf{U} \right| = 1} \right. \right\}} \|\mathbf{A}\mathbf{U} - \mathbf{B}\|_{F}^2 &= \underbrace{\min}_{\left\{\mathbf{U} \left| {\mathbf{U}^{-1} = {\mathbf{U}}^\dagger \atop \left| \mathbf{U} \right| = 1} \right. \right\}} \text{Tr}\left[\left(\mathbf{A}\mathbf{U} - \mathbf{B} \right)^\dagger \left(\mathbf{A}\mathbf{U} - \mathbf{B} \right)\right] \\ &= \underbrace{\max}_{\left\{\mathbf{U} \left| {\mathbf{U}^{-1} = {\mathbf{U}}^\dagger \atop \left| \mathbf{U} \right| = 1} \right. \right\}} \text{Tr}\left[\mathbf{U}^\dagger {\mathbf{A}}^\dagger \mathbf{B} \right] Here, :math:`\mathbf{U}_{n \times n}` is the permutation matrix. The solution is obtained by taking the singular value decomposition (SVD) of the product of the matrix, .. math:: \mathbf{A}^\dagger \mathbf{B} &= \tilde{\mathbf{U}} \tilde{\mathbf{\Sigma}} \tilde{\mathbf{V}}^{\dagger} \\ \mathbf{U}_{\text{optimum}} &= \tilde{\mathbf{U}} \tilde{\mathbf{S}} \tilde{\mathbf{V}}^{\dagger} Where :math:`\tilde{\mathbf{S}}_{n \times m}` is almost an identity matrix, .. math:: \tilde{\mathbf{S}}_{m \times n} \equiv \begin{bmatrix} 1 & 0 & \cdots & 0 & 0 \\ 0 & 1 & \ddots & \vdots &0 \\ 0 & \ddots &\ddots & 0 &\vdots \\ \vdots&0 & 0 & 1 &0 \\ 0 & 0 & 0 \cdots &0 &\operatorname{sgn} \left(\left|\mathbf{U}\mathbf{V}^\dagger\right|\right) \end{bmatrix} I.e. the smallest singular value is replaced by .. math:: \operatorname{sgn} \left(\left|\tilde{\mathbf{U}} \tilde{\mathbf{V}}^\dagger\right|\right) = \begin{cases} +1 \qquad \left|\tilde{\mathbf{U}} \tilde{\mathbf{V}}^\dagger\right| \geq 0 \\ -1 \qquad \left|\tilde{\mathbf{U}} \tilde{\mathbf{V}}^\dagger\right| < 0 \end{cases} Examples -------- >>> import numpy as np >>> array_a = np.array([[1.5, 7.4], [8.5, 4.5]]) >>> array_b = np.array([[6.29325035, 4.17193001, 0., 0,], ... [9.19238816, -2.82842712, 0., 0.], ... [0., 0., 0., 0.]]) >>> res = rotational(array_a, array_b, translate=False, scale=False) >>> res['array_u'] # rotational array array([[ 0.70710678, -0.70710678], [ 0.70710678, 0.70710678]]) >>> res['error'] # error 1.483808210011695e-17 """ # check inputs new_a, new_b = setup_input_arrays(array_a, array_b, remove_zero_col, remove_zero_row, pad_mode, translate, scale, check_finite, weight) # compute SVD of A.T * A array_u, _, array_vt = np.linalg.svd(np.dot(new_a.T, new_b)) # construct S which is an identity matrix with the smallest # singular value replaced by sgn(|U*V^t|). s_value = np.eye(new_a.shape[1]) s_value[-1, -1] = np.sign(np.linalg.det(np.dot(array_u, array_vt))) # compute optimum rotation matrix u_opt = np.dot(np.dot(array_u, s_value), array_vt) # compute single-sided error error error = compute_error(new_a, new_b, u_opt) return ProcrustesResult(new_a=new_a, new_b=new_b, array_u=u_opt, error=error)
def kopt_heuristic_double(array_m, array_n, ref_error, perm_p=None, perm_q=None, kopt_k=3, kopt_tol=1.e-8): r""" K-opt kopt for regular two-sided permutation Procrustes to improve the accuracy. Perform k-opt local search with every possible valid combination of the swapping mechanism for regular 2-sided permutation Procrustes. Parameters ---------- array_m : ndarray The array to be permuted. array_n : ndarray The reference array. ref_error : float The reference error value. perm_p : ndarray, optional The left permutation array which remains to be processed with k-opt local search. Default is the identity matrix with the same shape of array_m. perm_q : ndarray, optional The right permutation array which remains to be processed with k-opt local search. Default is the identity matrix with the same shape of array_m. kopt_k : int, optional Defines the oder of k-opt heuristic local search. For example, kopt_k=3 leads to a local search of 3 items and kopt_k=2 only searches for two items locally. Default=3. kopt_tol : float, optional Tolerance value to check if k-opt heuristic converges. Default=1.e-8. Returns ------- perm_kopt_p : ndarray The left permutation array after optimal heuristic search. perm_kopt_q : ndarray The right permutation array after optimal heuristic search. kopt_error : float The error distance of two arrays with the updated permutation array. """ if kopt_k < 2: raise ValueError("Kopt_k value must be a integer greater than 2.") # if perm_p is not specified, use the identity matrix as default if perm_p is None: perm_p = np.identity(np.shape(array_m)[0]) # if perm_p is not specified, use the identity matrix as default if perm_q is None: perm_q = np.identity(np.shape(array_m)[0]) num_row_left = perm_p.shape[0] num_row_right = perm_q.shape[0] kopt_error = ref_error # the left hand side permutation # pylint: disable=too-many-nested-blocks for comb_left in it.combinations(np.arange(num_row_left), r=kopt_k): for comb_perm_left in it.permutations(comb_left, r=kopt_k): if comb_perm_left != comb_left: perm_kopt_left = deepcopy(perm_p) # the right hand side permutation for comb_right in it.combinations(np.arange(num_row_right), r=kopt_k): for comb_perm_right in it.permutations(comb_right, r=kopt_k): if comb_perm_right != comb_right: perm_kopt_right = deepcopy(perm_q) perm_kopt_right[comb_right, :] = perm_kopt_right[ comb_perm_right, :] e_kopt_new_right = compute_error( array_n, array_m, perm_p.T, perm_kopt_right) if e_kopt_new_right < kopt_error: perm_q = perm_kopt_right kopt_error = e_kopt_new_right if kopt_error <= kopt_tol: break perm_kopt_left[comb_left, :] = perm_kopt_left[ comb_perm_left, :] e_kopt_new_left = compute_error(array_n, array_m, perm_kopt_left.T, perm_q) if e_kopt_new_left < kopt_error: perm_p = perm_kopt_left kopt_error = e_kopt_new_left if kopt_error <= kopt_tol: break return perm_p, perm_q, kopt_error
def generic( a: np.ndarray, b: np.ndarray, pad: bool = True, translate: bool = False, scale: bool = False, unpad_col: bool = False, unpad_row: bool = False, check_finite: bool = True, weight: Optional[np.ndarray] = None, use_svd: bool = False, ) -> ProcrustesResult: r"""Perform generic one-sided Procrustes. Given matrix :math:`\mathbf{A}_{m \times n}` and a reference matrix :math:`\mathbf{B}_{m \times n}`, find the transformation matrix :math:`\mathbf{T}_{n \times n}` that makes :math:`\mathbf{AT}` as close as possible to :math:`\mathbf{B}`. In other words, .. math:: \underbrace{\text{min}}_{\mathbf{T}} \quad \|\mathbf{A} \mathbf{T} - \mathbf{B}\|_{F}^2 This Procrustes method requires the :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices to have the same shape, which is gauranteed with the default ``pad`` argument for any given :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices. In preparing the :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices, the (optional) order of operations is: **1)** unpad zero rows/columns, **2)** translate the matrices to the origin, **3)** weight entries of :math:`\mathbf{A}`, **4)** scale the matrices to have unit norm, **5)** pad matrices with zero rows/columns so they have the same shape. Parameters ---------- a : ndarray The 2D-array :math:`\mathbf{A}` which is going to be transformed. b : ndarray The 2D-array :math:`\mathbf{B}` representing the reference matrix. pad : bool, optional Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape. translate : bool, optional If True, both arrays are centered at origin (columns of the arrays will have mean zero). scale : bool, optional If True, both arrays are normalized with respect to the Frobenius norm, i.e., :math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and :math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`. unpad_col : bool, optional If True, zero columns (with values less than 1.0e-8) on the right-hand side of the intial :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. unpad_row : bool, optional If True, zero rows (with values less than 1.0e-8) at the bottom of the intial :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. check_finite : bool, optional If True, convert the input to an array, checking for NaNs or Infs. weight : ndarray, optional The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}` matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`. use_svd : bool, optional If True, the (Moore-Penrose) pseudo-inverse is computed by singular-value decomposition (SVD) including all 'large' singular values (using `scipy.linalg.pinv2`). If False, the the (Moore-Penrose) pseudo-inverse is computed by least-squares solver (using `scipy.linalg.pinv`). The least-squares implementation is less efficient, but more robust, than the SVD implementation. Returns ------- res : ProcrustesResult The Procrustes result represented as a class:`utils.ProcrustesResult` object. Notes ----- The optimal transformation matrix is obtained by solving the least-squares equations, .. math:: \mathbf{X}_\text{opt} = {(\mathbf{A}^{\top}\mathbf{A})}^{-1} \mathbf{A}^{\top} \mathbf{B} If :math:`m < n`, the transformation matrix :math:`\mathbf{T}_\text{opt}` is not unique, because the system of equations is underdetermined (i.e., there are fewer equations than unknowns). """ if not isinstance(use_svd, bool): raise TypeError( f"The use_svd parameter {type(use_svd)} should be type bool.") # check inputs new_a, new_b = setup_input_arrays( a, b, unpad_col, unpad_row, pad, translate, scale, check_finite, weight, ) # compute the generic solution if use_svd: # Use the singular value decomposition, much faster but less robust. a_inv = pinv2(np.dot(new_a.T, new_a)) else: # Uses the least-squared method. a_inv = pinv(np.dot(new_a.T, new_a)) array_x = np.linalg.multi_dot([a_inv, new_a.T, new_b]) # compute one-sided error e_opt = compute_error(new_a, new_b, array_x) return ProcrustesResult(error=e_opt, new_a=new_a, new_b=new_b, t=array_x, s=None)
def softassign( a: np.ndarray, b: np.ndarray, pad: bool = True, translate: bool = False, scale: bool = False, unpad_col: bool = False, unpad_row: bool = False, check_finite: bool = True, weight: Optional[np.ndarray] = None, iteration_soft: int = 50, iteration_sink: int = 200, beta_r: float = 1.10, beta_f: float = 1.0e5, epsilon: float = 0.05, epsilon_soft: float = 1.0e-3, epsilon_sink: float = 1.0e-3, k: float = 0.15, gamma_scaler: float = 1.01, n_stop: int = 3, adapted: bool = True, beta_0: Optional[float] = None, m_guess: Optional[float] = None, iteration_anneal: Optional[int] = None, kopt: bool = False, kopt_k: int = 3, ) -> ProcrustesResult: r""" Find the transformation matrix for 2-sided permutation Procrustes with softassign algorithm. Parameters ---------- a : ndarray The 2D-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed. b : ndarray The 2D-array :math:`\mathbf{B}_{m \times n}` representing the reference. pad : bool, optional Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape. translate : bool, optional If True, both arrays are centered at origin (columns of the arrays will have mean zero). scale : bool, optional If True, both arrays are normalized with respect to the Frobenius norm, i.e., :math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and :math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`. unpad_col : bool, optional If True, zero columns (with values less than 1.0e-8) on the right-hand side of the intial :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. unpad_row : bool, optional If True, zero rows (with values less than 1.0e-8) at the bottom of the intial :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. check_finite : bool, optional If true, convert the input to an array, checking for NaNs or Infs. Default=True. weight : ndarray, optional The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}` matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`. iteration_soft : int, optional Number of iterations for softassign loop. iteration_sink : int, optional Number of iterations for Sinkhorn loop. beta_r : float, optional Annealing rate which should greater than 1. beta_f : float, optional The final inverse temperature. epsilon : float, optional The tolerance value for annealing loop. epsilon_soft : float, optional The tolerance value used for softassign. epsilon_sink : float, optional The tolerance value used for Sinkhorn loop. If adapted version is used, it will use the adapted tolerance value for Sinkhorn instead. k : float, optional This parameter controls how much tighter the coverage threshold for the interior loop should be than the coverage threshold for the loops outside. It has be be within the integral :math:`(0,1)`. gamma_scaler : float, optional This parameter ensures the quadratic cost function including self-amplification positive define. n_stop : int, optional Number of running steps after the calculation converges in the relaxation procedure. adapted : bool, optional If adapted, this function will use the tighter covergence threshold for the interior loops. beta_0 : float, optional Initial inverse temperature. beta_f : float, optional Final inverse temperature. m_guess : ndarray, optional The initial guess of the doubly-stochastic matrix. iteration_anneal : int, optional Number of iterations for annealing loop. kopt : bool, optional If True, the k_opt heuristic search will be performed. kopt_k : int, optional Defines the oder of k-opt heuristic local search. For example, kopt_k=3 leads to a local search of 3 items and kopt_k=2 only searches for two items locally. weight : ndarray, optional The weighting matrix. Returns ------- res : ProcrustesResult The Procrustes result represented as a class:`utils.ProcrustesResult` object. Notes ----- Quadratic assignment problem (QAP) has played a very special but fundamental role in combinatorial optimization problems. The problem can be defined as a optimization problem to minimize the cost to assign a set of facilities to a set of locations. The cost is a function of the flow between the facilities and the geographical distances among various facilities. The objective function (also named loss function in machine learning) is defined as .. math:: E_{qap}(M, \mu, \nu) = - \frac{1}{2}\Sigma_{aibj}C_{ai;bj}M_{ai}M_{bj} + \Sigma_{a}{\mu}_a (\Sigma_i M_{ai} -1) \\ + \Sigma_i {\nu}_i (\Sigma_i M_{ai} -1) - \frac{\gamma}{2}\Sigma_{ai} {M_{ai}}^2 + \frac{1}{\beta} \Sigma_{ai} M_{ai}\log{M_{ai}} where :math:`C_{ai,bj}` is the benefit matrix, :math:`M` is the desired :math:`N \times N` permutation matrix. :math:`E` is the energy function which comes along with a self-amplification term with `\gamma`, two Lagrange parameters :math:`\mu` and :math:`\nu` for constrained optimization and :math:`M_{ai} \log{M_{ai}}` servers as a barrier function which ensures positivity of :math:`M_{ai}`. The inverse temperature :math:`\beta` is a deterministic annealing control parameter. Examples -------- >>> import numpy as np >>> array_a = np.array([[4, 5, 3, 3], [5, 7, 3, 5], ... [3, 3, 2, 2], [3, 5, 2, 5]]) # define a random matrix >>> perm = np.array([[0., 0., 1., 0.], [1., 0., 0., 0.], ... [0., 0., 0., 1.], [0., 1., 0., 0.]]) # define b by permuting array_a >>> b = np.dot(perm.T, np.dot(a, perm)) >>> new_a, new_b, M_ai, error = softassign(a,b,unpad_col=False,unpad_row=False) >>> M_ai # the permutation matrix array([[0., 0., 1., 0.], [1., 0., 0., 0.], [0., 0., 0., 1.], [0., 1., 0., 0.]]) >>> error # the error 0.0 """ # pylint: disable-msg=too-many-arguments # pylint: disable-msg=too-many-branches # todo: add linear_cost_func with default value 0 # Check beta_r if beta_r <= 1: raise ValueError("Argument beta_r cannot be less than 1.") new_a, new_b = setup_input_arrays(a, b, unpad_col, unpad_row, pad, translate, scale, check_finite, weight) # Check that A & B are square and that they match each other. if new_a.shape[0] != new_a.shape[1]: raise ValueError(f"Matrix A should be square but A.shape={new_a.shape}" "Check pad, unpad_col, and unpad_row arguments.") if new_b.shape[0] != new_b.shape[1]: raise ValueError(f"Matrix B should be square but B.shape={new_b.shape}" "Check pad, unpad_col, and unpad_row arguments.") if new_a.shape != new_b.shape: raise ValueError(f"New matrix A {new_a.shape} should match the new" f" matrix B shape {new_b.shape}.") # Initialization # Compute the benefit matrix array_c = np.kron(new_a, new_b) # Get the shape of A (B and the permutation matrix as well) row_num = new_a.shape[0] c_tensor = array_c.reshape(row_num, row_num, row_num, row_num) # Compute the beta_0 gamma = _compute_gamma(array_c, row_num, gamma_scaler) if beta_0 is None: c_gamma = array_c + gamma * (np.identity(row_num * row_num)) eival_gamma = np.amax(np.abs(np.linalg.eigvalsh(c_gamma))) beta_0 = gamma_scaler * max(1.0e-10, eival_gamma / row_num) beta_0 = 1 / beta_0 else: beta_0 *= row_num beta = beta_0 # We will use iteration_anneal if provided even if the final inverse temperature is specified # iteration_anneal is not None, beta_f can be None or not if iteration_anneal is not None: beta_f = beta_0 * np.power(beta_r, iteration_anneal) * row_num # iteration_anneal is None and beta_f is not None elif iteration_anneal is None and beta_f is not None: beta_f *= row_num # Both iteration_anneal and beta_f are None else: raise ValueError( "We must specify at least one of iteration_anneal and beta_f and " "specify only one is recommended.") # Initialization of m_ai # check shape of m_guess if m_guess is not None: if np.any(m_guess < 0): raise ValueError( "The initial guess of permutation matrix cannot contain any negative values." ) if m_guess.shape[0] == row_num and m_guess.shape[1] == row_num: array_m = m_guess else: warnings.warn( f"The shape of m_guess does not match ({row_num}, {row_num})." "Use random initial guess instead.") array_m = np.abs( np.random.normal(loc=1.0, scale=0.1, size=(row_num, row_num))) else: # m_relax_old = 1 / N + np.random.rand(N, N) array_m = np.abs( np.random.normal(loc=1.0, scale=0.1, size=(row_num, row_num))) array_m[array_m < 0] = 0 array_m = array_m / row_num nochange = 0 if adapted: epsilon_sink = epsilon_soft * k while beta < beta_f: # relaxation m_old_beta = deepcopy(array_m) # softassign loop for _ in np.arange(iteration_soft): m_old_soft = deepcopy(array_m) # Compute Z in relaxation step # C_gamma_tensor = C_gamma.reshape(N, N, N, N) # Z = -np.einsum('ijkl,jl->ik', C_gamma_tensor, M) # Z -= linear_cost_func array_z = np.einsum("aibj,bj->ai", c_tensor, array_m) array_z += gamma * array_m # soft_assign array_m = np.exp(beta * array_z) # Sinkhorn loop for _ in np.arange(iteration_sink): # Row normalization array_m = array_m / array_m.sum(axis=1, keepdims=1) # Column normalization array_m = array_m / array_m.sum(axis=0, keepdims=1) # Compute the delata_M_sink if np.amax(np.abs(array_m.sum(axis=1, keepdims=1) - 1)) < epsilon_sink: array_m = array_m / array_m.sum(axis=1, keepdims=1) break change_soft = np.amax(np.abs(array_m - m_old_soft)) # pylint: disable-msg=no-else-break if change_soft < epsilon_soft: break else: if adapted: epsilon_sink = change_soft * k else: continue change_annealing = np.amax(np.abs(array_m - m_old_beta)) if change_annealing < epsilon: nochange += 1 if nochange > n_stop: break else: nochange = 0 beta *= beta_r if adapted: epsilon_soft = change_soft * k epsilon_sink = epsilon_soft * k # Compute the error array_m = permutation(np.eye(array_m.shape[0]), array_m)["t"] # k-opt heuristic if kopt: fun_error = lambda p: compute_error(new_a, new_b, p, p.T) array_m, error = kopt_heuristic_single(fun_error, p0=array_m, k=kopt_k) else: error = compute_error(new_a, new_b, array_m, array_m.T) return ProcrustesResult(error=error, new_a=new_a, new_b=new_b, t=array_m, s=None)
def permutation_2sided_explicit(array_a, array_b, remove_zero_col=True, remove_zero_row=True, pad_mode="row-col", translate=False, scale=False, check_finite=True, weight=None): r""" Two sided permutation Procrustes by explicit method. Parameters ---------- array_a : ndarray The 2d-array :math:`\mathbf{A}_{m \times n}` which is going to be transformed. array_b : ndarray The 2d-array :math:`\mathbf{B}_{m \times n}` representing the reference. remove_zero_col : bool, optional If True, near zero columns (less than 1e-8) on the right side will be removed. Default=True. remove_zero_row : bool, optional If True, near zero rows (less than 1e-8) on the bottom will be removed. Default= True. pad_mode : str, optional Specifying how to pad the arrays, listed below. Default="row-col". - "row" The array with fewer rows is padded with zero rows so that both have the same number of rows. - "col" The array with fewer columns is padded with zero columns so that both have the same number of columns. - "row-col" The array with fewer rows is padded with zero rows, and the array with fewer columns is padded with zero columns, so that both have the same dimensions. This does not necessarily result in square arrays. - "square" The arrays are padded with zero rows and zero columns so that they are both squared arrays. The dimension of square array is specified based on the highest dimension, i.e. :math:`\text{max}(n_a, m_a, n_b, m_b)`. translate : bool, optional If True, both arrays are translated to be centered at origin. Default=False. scale : bool, optional If True, both arrays are column normalized to unity. Default=False. check_finite : bool, optional If true, convert the input to an array, checking for NaNs or Infs. Default=True. weight : ndarray The weighting matrix. Default=None. Returns ------- res : ProcrustesResult Procrustes analysis result object. Attributes ---------- new_a : ndarray The transformed ndarray A. new_b : ndarray The transformed ndarray B. array_u : ndarray The optimum permutation transformation matrix. error : float Two-sided orthogonal Procrustes error. Notes ----- Given matrix :math:`\mathbf{A}_{n \times n}` and a reference :math:`\mathbf{B}_{n \times n}`, find a permutation of rows/columns of :math:`\mathbf{A}_{n \times n}` that makes it as close as possible to :math:`\mathbf{B}_{n \times n}`. But be careful that we are using a brutal way to loop over all the possible permutation matrices and return the one that gives the minimum error(distance). This method can be used as a checker for small dataset. """ print("Warning: This brute-strength method is computational expensive! \n" "But it can be used as a checker for a small dataset.") # check inputs new_a, new_b = setup_input_arrays(array_a, array_b, remove_zero_col, remove_zero_row, pad_mode, translate, scale, check_finite, weight) perm1 = np.zeros(np.shape(new_a)) perm_error1 = np.inf for comb in it.permutations(np.arange(np.shape(new_a)[0])): # Compute the permutation matrix size = np.shape(new_a)[1] perm2 = np.zeros((size, size)) perm2[np.arange(size), comb] = 1 perm_error2 = compute_error(new_a, new_b, perm2, perm2) if perm_error2 < perm_error1: perm_error1 = perm_error2 perm1 = perm2 # return new_a, new_b, perm1, perm_error1 return ProcrustesResult(new_a=new_a, new_b=new_b, array_u=perm1, error=perm_error1)
def permutation_2sided( a, b, single=True, method="kopt", guess_p1=None, guess_p2=None, pad=False, unpad_col=False, unpad_row=False, translate=False, scale=False, check_finite=True, options=None, weight=None, lapack_driver="gesvd", ): r"""Perform two-sided permutation Procrustes. Parameters ---------- a : ndarray The 2d-array :math:`\mathbf{A}` which is going to be transformed. b : ndarray The 2d-array :math:`\mathbf{B}` representing the reference matrix. single : bool, optional If `True`, the single-transformation Procrustes is performed to obtain :math:`\mathbf{P}`. If `False`, the two-transformations Procrustes is performed to obtain :math:`\mathbf{P}_1` and :math:`\mathbf{P}_2`. method : str, optional The method to solve for permutation matrices. For `single=False`, these include "flip-flop" and "k-opt" methods. For `single=True`, these include "approx-normal1", "approx-normal2", "approx-umeyama", "approx-umeyama-svd", "k-opt", "soft-assign", and "nmf". guess_p1 : np.ndarray, optional Guess for :math:`\mathbf{P}_1` matrix given as a 2D-array. This is only required for the two-transformations case specified by setting `single=False`. guess_p2 : np.ndarray, optional Guess for :math:`\mathbf{P}_2` matrix given as a 2D-array. pad : bool, optional Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape. unpad_col : bool, optional If True, zero columns (with values less than 1.0e-8) on the right-hand side are removed. unpad_row : bool, optional If True, zero rows (with values less than 1.0e-8) at the bottom are removed. translate : bool, optional If True, both arrays are centered at origin (columns of the arrays will have mean zero). scale : bool, optional If True, both arrays are normalized with respect to the Frobenius norm, i.e., :math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and :math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`. check_finite : bool, optional If True, convert the input to an array, checking for NaNs or Infs. options : dict, optional A dictionary of method options. weight : ndarray, optional The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}` matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`. lapack_driver : {'gesvd', 'gesdd'}, optional Whether to use the more efficient divide-and-conquer approach ('gesdd') or the more robust general rectangular approach ('gesvd') to compute the singular-value decomposition with `scipy.linalg.svd`. Returns ------- res : ProcrustesResult The Procrustes result represented as a class:`utils.ProcrustesResult` object. Notes ----- Given matrix :math:`\mathbf{A}_{n \times n}` and a reference :math:`\mathbf{B}_{n \times n}`, find a permutation of rows/columns of :math:`\mathbf{A}_{n \times n}` that makes it as close as possible to :math:`\mathbf{B}_{n \times n}`. I.e., .. math:: &\underbrace{\text{min}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \|\mathbf{P}^\dagger \mathbf{A} \mathbf{P} - \mathbf{B}\|_{F}^2\\ = &\underbrace{\text{min}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\left(\mathbf{P}^\dagger\mathbf{A}\mathbf{P} - \mathbf{B} \right)^\dagger \left(\mathbf{P}^\dagger\mathbf{A}\mathbf{P} - \mathbf{B} \right)\right] \\ = &\underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{A}^\dagger\mathbf{P}\mathbf{B} \right]\\ Here, :math:`\mathbf{P}_{n \times n}` is the permutation matrix. Given an intial guess, the best local minimum can be obtained by the iterative procedure, .. math:: p_{ij}^{(n + 1)} = p_{ij}^{(n)} \sqrt{ \frac{2\left[\mathbf{T}^{(n)}\right]_{ij}}{\left[ \mathbf{P}^{(n)} \left( \left(\mathbf{P}^{(n)}\right)^T \mathbf{T} + \left( \left(\mathbf{P}^{(n)}\right)^T \mathbf{T} \right)^T \right) \right]_{ij}} } where, .. math:: \mathbf{T}^{(n)} = \mathbf{A} \mathbf{P}^{(n)} \mathbf{B} Using an initial guess, the iteration can stops when the change in :math:`d` is below the specified threshold, .. math:: d = \text{Tr} \left[\left(\mathbf{P}^{(n+1)} -\mathbf{P}^{(n)} \right)^T \left(\mathbf{P}^{(n+1)} -\mathbf{P}^{(n)} \right)\right] The outcome of the iterative procedure :math:`\mathbf{P}^{(\infty)}` is not a permutation matrix. So, the closest permutation can be found by setting ``refinement=True``. This uses :class:`procrustes.permutation.PermutationProcrustes` to find the closest permutation; that is, .. math:: \underbrace{\text{min}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \|\mathbf{P} - \mathbf{P}^{(\infty)}\|_{F}^2 = \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{P}^{(\infty)} \right] The answer to this problem is a heuristic solution for the matrix-matching problem that seems to be relatively accurate. **Initial Guess:** Two possible initial guesses are inferred from the Umeyama procedure. One can find either the closest permutation matrix to :math:`\mathbf{U}_\text{Umeyama}` or to :math:`\mathbf{U}_\text{Umeyama}^\text{approx.}`. Considering the :class:`procrustes.permutation.PermutationProcrustes`, the resulting permutation matrix can be specified as initial guess through ``guess=umeyama`` and ``guess=umeyama_approx``, which solves: .. math:: \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{U}_\text{Umeyama} \right] \\ \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger\mathbf{U}_\text{Umeyama}^\text{approx.} \right] Another choice is to start by solving a normal permutation Procrustes problem. In other words, write new matrices, :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0`, with columns like, .. math:: \begin{bmatrix} a_{ii} \\ p \cdot \text{sgn}\left( a_{ij_\text{max}} \right) \underbrace{\text{max}}_{1 \le j \le n} \left(\left|a_{ij}\right|\right)\\ p^2 \cdot \text{sgn}\left( a_{ij_{\text{max}-1}} \right) \underbrace{\text{max}-1}_{1 \le j \le n} \left(\left|a_{ij}\right|\right)\\ \vdots \end{bmatrix} Here, :math:`\text{max}-1` denotes the second-largest absolute value of elements, :math:`\text{max}-2` is the third-largest abosule value of elements, etc. The matrices :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0` have the diagonal elements of :math:`\mathbf{A}` and :math:`\mathbf{B}` in the first row, and below the first row has the largest off-diagonal element in row :math:`i`, the second-largest off-diagonal element, etc. The elements are weighted by a factor :math:`0 < p < 1`, so that the smaller elements are considered less important for matching. The matrices can be truncated after a few terms; for example, after the size of elements falls below some threshold. A reasonable choice would be to stop after :math:`\lfloor \frac{-2\ln 10}{\ln p} +1\rfloor` rows; this ensures that the size of the elements in the last row is less than 1% of those in the first off-diagonal row. There are obviously many different ways to construct the matrices :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0`. Another, even better, method would be to try to encode not only what the off-diagonal elements are, but which element in the matrix they correspond to. One could do that by not only listing the diagonal elements, but also listing the associated off-diagonal element. I.e., the columns of :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0` would be, .. math:: \begin{bmatrix} a_{ii} \\ p \cdot a_{j_\text{max} j_\text{max}} \\ p \cdot \text{sgn}\left( a_{ij_\text{max}} \right) \underbrace{\text{max}}_{1 \le j \le n} \left(\left|a_{ij}\right|\right)\\ p^2 \cdot a_{j_{\text{max}-1} j_{\text{max}-1}} \\ p^2 \cdot \text{sgn}\left( a_{ij_{\text{max}-1}} \right) \underbrace{\text{max}-1}_{1 \le j \le n} \left(\left|a_{ij}\right|\right)\\ \vdots \end{bmatrix} In this case, you would stop the procedure after :math:`m = \left\lfloor {\frac{{ - 4\ln 10}}{{\ln p}} + 1} \right \rfloor` rows. Then one uses the :class:`procrustes.permutation.PermutationProcrustes` to match the constructed matrices :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0` instead of :math:`\mathbf{A}` and :math:`\mathbf{B}`. I.e., .. math:: \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\} \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}} \text{Tr}\left[\mathbf{P}^\dagger \left(\mathbf{A^0}^\dagger\mathbf{B^0}\right)\right] Please note that the "umeyama_approx" might give inaccurate permutation matrix. More specificity, this is a approximated Umeyama method. One example we can give is that when we compute the permutation matrix that transforms :math:`A` to :math:`B`, the "umeyama_approx" method can not give the exact permutation transformation matrix while "umeyama", "normal1" and "normal2" do. .. math:: A = \begin{bmatrix} 4 & 5 & -3 & 3 \\ 5 & 7 & 3 & -5 \\ -3 & 3 & 2 & 2 \\ 3 & -5 & 2 & 5 \\ \end{bmatrix} \\ B = \begin{bmatrix} 73 & 100 & 73 & -62 \\ 100 & 208 & -116 & 154 \\ 73 & -116 & 154 & 100 \\ -62 & 154 & 100 & 127 \\ \end{bmatrix} \\ References ---------- [1] C. Ding, T. Li and M. I. Jordan, "Nonnegative Matrix Factorization for Combinatorial Optimization: Spectral Clustering, Graph Matching, and Clique Finding," 2008 Eighth IEEE International Conference on Data Mining, Pisa, Italy, 2008, pp. 183-192, doi: 10.1109/ICDM.2008.130. [2] Papadimitriou, Pythagoras. "Parallel solution of SVD-related problems, with applications." PhD diss., University of Manchester, 1993. [3] S. Umeyama. An eigendecomposition approach toweighted graph matching problems. IEEE Trans. on Pattern Analysis and Machine Intelligence, 10:695 –703, 1988. """ # check single argument if not isinstance(single, bool): raise TypeError( f"Argument single is not a boolean! Given type={type(single)}") # check inputs new_a, new_b = setup_input_arrays(a, b, unpad_col, unpad_row, pad, translate, scale, check_finite, weight) # check that A & B are square in case of single transformation if single and new_a.shape[0] != new_a.shape[1]: raise ValueError( f"For single={single}, matrix A should be square but A.shape={new_a.shape}" "Check pad, unpad_col, and unpad_row arguments.") if single and new_b.shape[0] != new_b.shape[1]: raise ValueError( f"For single={single}, matrix B should be square but B.shape={new_b.shape}" "Check pad, unpad_col, and unpad_row arguments.") # print a statement if user-specified guess is not used if method.startswith("approx") and guess_p1 is not None: print( f"Method={method} does not use an initial guess, so guess_p1 is ignored!" ) if method.startswith("approx") and guess_p2 is not None: print( f"Method={method} does not use an initial guess, so guess_p2 is ignored!" ) # get the number of rows & columns of matrix A m, n = new_a.shape # assign & check initial guess for P1 if single and guess_p1 is not None: raise ValueError( f"For single={single}, P1 is transpose of P2, so guess_p1 should be None." ) if not single: if guess_p1 is None: guess_p1 = np.eye(m) if guess_p1.shape != (m, m): raise ValueError( f"Argument guess_p1 should be either None or a ({m}, {m}) array." ) # assign & check initial guess for P2 if guess_p2 is None: guess_p2 = np.eye(n) if guess_p2.shape != (n, n): raise ValueError( f"Argument guess_p2 should be either None or a ({n}, {n}) array.") # check options dictionary & assign default keys defaults = {"tol": 1.0e-8, "maxiter": 500, "k": 3} if options is not None: if not isinstance(options, dict): raise ValueError( f"Argument options should be a dictionary. Given type={type(options)}" ) if not all(k in defaults.keys() for k in options.keys()): raise ValueError( f"Argument options should only have {defaults.keys()} keys. " f"Given options contains {options.keys()} keys!") # update defaults dictionary to use the specified options defaults.update(options) # 2-sided permutation Procrustes with two transformations # ------------------------------------------------------- if not single: if method == "flip-flop": # compute permutations using flip-flop algorithm perm1, perm2, error = _permutation_2sided_2trans_flipflop( new_a, new_b, defaults["tol"], defaults["maxiter"], guess_p1, guess_p2) elif method == "k-opt": # compute permutations using k-opt heuristic search fun_error = lambda p1, p2: compute_error(new_a, new_b, p2, p1.T) perm1, perm2, error = kopt_heuristic_double(fun_error, p1=guess_p1, p2=guess_p2, k=defaults["k"]) else: raise ValueError( f"Method={method} not supported for single={single} transformation!" ) return ProcrustesResult(error=error, new_a=new_a, new_b=new_b, t=perm2, s=perm1) # 2-sided permutation Procrustes with one transformation # ------------------------------------------------------ # The (un)directed iterative procedure for finding the permutation matrix takes the square # root of the matrix entries, which can result in complex numbers if the entries are # negative. To avoid this, all matrix entries are shifted (by the smallest amount) to be # positive. This causes no change to the objective function, as it's a constant value # being added to all entries of a and b. shift = 1.0e-6 if np.min(new_a) < 0 or np.min(new_b) < 0: shift += abs(min(np.min(new_a), np.min(new_b))) # shift is a float, so even if new_a or new_b are ints, the positive matrices are floats # default shift is not zero to avoid division by zero later in the algorithm pos_a = new_a + shift pos_b = new_b + shift if method == "approx-normal1": tmp_a = _approx_permutation_2sided_1trans_normal1(a) tmp_b = _approx_permutation_2sided_1trans_normal1(b) perm = permutation(tmp_a, tmp_b).t elif method == "approx-normal2": tmp_a = _approx_permutation_2sided_1trans_normal2(a) tmp_b = _approx_permutation_2sided_1trans_normal2(b) perm = permutation(tmp_a, tmp_b).t elif method == "approx-umeyama": perm = _approx_permutation_2sided_1trans_umeyama(pos_a, pos_b) elif method == "approx-umeyama-svd": perm = _approx_permutation_2sided_1trans_umeyama_svd( a, b, lapack_driver) elif method == "k-opt": fun_error = lambda p: compute_error(pos_a, pos_b, p, p.T) perm, error = kopt_heuristic_single(fun_error, p0=guess_p2, k=defaults["k"]) elif method == "soft-assign": raise NotImplementedError elif method == "nmf": # check whether A & B are symmetric (within a relative & absolute tolerance) is_pos_a_symmetric = np.allclose(pos_a, pos_a.T, rtol=1.0e-05, atol=1.0e-08) is_pos_b_symmetric = np.allclose(pos_b, pos_b.T, rtol=1.0e-05, atol=1.0e-08) if is_pos_a_symmetric and is_pos_b_symmetric: # undirected graph matching problem (iterative procedure) perm = _permutation_2sided_1trans_undirected( pos_a, pos_b, guess_p2, defaults['tol'], defaults['maxiter']) else: # directed graph matching problem (iterative procedure) perm = _permutation_2sided_1trans_directed(pos_a, pos_b, guess_p2, defaults['tol'], defaults['maxiter']) else: raise ValueError( f"Method={method} not supported for single={single} transformation!" ) # some of the methods for 2-sided-1-transformation permutation procrustes does not produce a # permutation matrix. So, their output is treated like a guess, and the closest permutation # matrix is found using 1-sided permutation procrustes (where A=I & B=perm) # Even though this step is not needed for ALL methods (e.g. k-opt, normal1, & normal2), to # make the code simple, this step is performed for all methods as its cost is negligible. perm = permutation( np.eye(perm.shape[0]), perm, translate=False, scale=False, unpad_col=False, unpad_row=False, check_finite=True, ).t # compute error error = compute_error(new_a, new_b, t=perm, s=perm.T) return ProcrustesResult(error=error, new_a=new_a, new_b=new_b, t=perm, s=perm.T)
def _2sided_regular(array_m, array_n, tol, iteration): # Regular two-sided permutation Procrustes # :math:` {\(\vert M-PNQ \vert\)}^2_F` # taken from page 64 in # parallel solution of svd-related problems, with applications # Pythagoras Papadimitriou, University of Manchester, 1993 # Fix P = I first # Initial guess for P array_p1 = np.eye(array_m.shape[0], array_m.shape[0]) # Initial guess for Q array_q1 = _2sided_hungarian(np.dot(array_n.T, array_m)) error1 = compute_error(array_n, array_m, array_p1.T, array_q1) step1 = 0 # while loop for the original algorithm while error1 > tol and step1 < iteration: step1 += 1 # Update P array_p1 = _2sided_hungarian( np.dot(np.dot(array_n, array_q1), array_m.T)) array_p1 = np.transpose(array_p1) # Update the error error1 = compute_error(array_n, array_m, array_p1.T, array_q1) if error1 > tol: # Update Q array_q1 = _2sided_hungarian( np.dot(np.dot(array_n.T, array_p1.T), array_m)) # Update the error error1 = compute_error(array_n, array_m, array_p1.T, array_q1) else: break if step1 == iteration: print("Maximum iteration reached in the first case! Error={0}". format(error1)) # Fix Q = I first # Initial guess for Q array_q2 = np.eye(array_m.shape[1], array_m.shape[1]) # Initial guess for P array_p2 = _2sided_hungarian(np.dot(array_n, array_m.T)) array_p2 = np.transpose(array_p2) error2 = compute_error(array_n, array_m, array_p2.T, array_q2) step2 = 0 # while loop for the original algorithm while error2 > tol and step2 < iteration: # Update Q array_q2 = _2sided_hungarian( np.dot(np.dot(array_n.T, array_p2.T), array_m)) # Update the error error2 = compute_error(array_n, array_m, array_p2.T, array_q1) if error2 > tol: array_p2 = _2sided_hungarian( np.dot(np.dot(array_n, array_q2), array_m.T)) array_p2 = np.transpose(array_p2) # Update the error error2 = compute_error(array_n, array_m, array_p2.T, array_q2) step2 += 1 else: break if step2 == iteration: print("Maximum iteration reached in the second case! Error={0}". format(error2)) if error1 <= error2: array_p = array_p1 array_q = array_q1 error = error1 else: array_p = array_p2 array_q = array_q2 error = error2 return array_p, array_q, error
def rotational( a: np.ndarray, b: np.ndarray, pad: bool = True, translate: bool = False, scale: bool = False, unpad_col: bool = False, unpad_row: bool = False, check_finite: bool = True, weight: Optional[np.ndarray] = None, lapack_driver: str = "gesvd", ) -> ProcrustesResult: r"""Perform rotational Procrustes. Given a matrix :math:`\mathbf{A}_{m \times n}` and a reference matrix :math:`\mathbf{B}_{m \times n}`, find the rotational transformation matrix :math:`\mathbf{R}_{n \times n}` that makes :math:`\mathbf{A}` as close as possible to :math:`\mathbf{B}`. In other words, .. math:: \underbrace{\min}_{\left\{\mathbf{R} \left| {\mathbf{R}^{-1} = {\mathbf{R}}^\dagger \atop \left| \mathbf{R} \right| = 1} \right. \right\}} \|\mathbf{A}\mathbf{R} - \mathbf{B}\|_{F}^2 This Procrustes method requires the :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices to have the same shape, which is gauranteed with the default ``pad`` argument for any given :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices. In preparing the :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices, the (optional) order of operations is: **1)** unpad zero rows/columns, **2)** translate the matrices to the origin, **3)** weight entries of :math:`\mathbf{A}`, **4)** scale the matrices to have unit norm, **5)** pad matrices with zero rows/columns so they have the same shape. Parameters ---------- a : ndarray The 2D-array :math:`\mathbf{A}` which is going to be transformed. b : ndarray The 2D-array :math:`\mathbf{B}` representing the reference matrix. pad : bool, optional Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape. translate : bool, optional If True, both arrays are centered at origin (columns of the arrays will have mean zero). scale : bool, optional If True, both arrays are normalized with respect to the Frobenius norm, i.e., :math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and :math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`. unpad_col : bool, optional If True, zero columns (with values less than 1.0e-8) on the right-hand side of the intial :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. unpad_row : bool, optional If True, zero rows (with values less than 1.0e-8) at the bottom of the intial :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. check_finite : bool, optional If True, convert the input to an array, checking for NaNs or Infs. weight : ndarray, optional The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}` matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`. lapack_driver : {'gesvd', 'gesdd'}, optional Whether to use the more efficient divide-and-conquer approach ('gesdd') or the more robust general rectangular approach ('gesvd') to compute the singular-value decomposition with `scipy.linalg.svd`. Returns ------- res : ProcrustesResult The Procrustes result represented as a class:`utils.ProcrustesResult` object. Notes ----- The optimal rotational matrix is obtained by, .. math:: \mathbf{R}_{\text{opt}} = \arg \underbrace{\min}_{\left\{\mathbf{R} \left| {\mathbf{R}^{-1} = {\mathbf{R}}^\dagger \atop \left| \mathbf{R} \right| = 1} \right. \right\}} \|\mathbf{A}\mathbf{R} - \mathbf{B}\|_{F}^2 = \arg \underbrace{\max}_{\left\{\mathbf{R} \left| {\mathbf{R}^{-1} = {\mathbf{R}}^\dagger \atop \left| \mathbf{R} \right| = 1} \right. \right\}} \text{Tr}\left[\mathbf{R}^\dagger {\mathbf{A}}^\dagger \mathbf{B} \right] The solution is obtained by taking the singular value decomposition (SVD) of the :math:`\mathbf{A}^\dagger \mathbf{B}` matrix, .. math:: \mathbf{A}^\dagger \mathbf{B} &= \tilde{\mathbf{U}} \tilde{\mathbf{\Sigma}} \tilde{\mathbf{V}}^{\dagger} \\ \mathbf{R}_{\text{opt}} &= \tilde{\mathbf{U}} \tilde{\mathbf{S}} \tilde{\mathbf{V}}^{\dagger} where :math:`\tilde{\mathbf{S}}_{n \times m}` is almost an identity matrix, .. math:: \tilde{\mathbf{S}}_{m \times n} \equiv \begin{bmatrix} 1 & 0 & \cdots & 0 & 0 \\ 0 & 1 & \ddots & \vdots &0 \\ 0 & \ddots &\ddots & 0 &\vdots \\ \vdots&0 & 0 & 1 &0 \\ 0 & 0 & 0 \cdots &0 &\operatorname{sgn} \left(\left|\mathbf{U}\mathbf{V}^\dagger\right|\right) \end{bmatrix} in which the smallest singular value is replaced by .. math:: \operatorname{sgn} \left(\left|\tilde{\mathbf{U}} \tilde{\mathbf{V}}^\dagger\right|\right) = \begin{cases} +1 \qquad \left|\tilde{\mathbf{U}} \tilde{\mathbf{V}}^\dagger\right| \geq 0 \\ -1 \qquad \left|\tilde{\mathbf{U}} \tilde{\mathbf{V}}^\dagger\right| < 0 \end{cases} Examples -------- >>> import numpy as np >>> array_a = np.array([[1.5, 7.4], [8.5, 4.5]]) >>> array_b = np.array([[6.29325035, 4.17193001, 0., 0,], ... [9.19238816, -2.82842712, 0., 0.], ... [0., 0., 0., 0.]]) >>> res = rotational(array_a,array_b,translate=False,scale=False) >>> res.t # rotational transformation array([[ 0.70710678, -0.70710678], [ 0.70710678, 0.70710678]]) >>> res.error # one-sided Procrustes error 1.483808210011695e-17 """ # check inputs new_a, new_b = setup_input_arrays( a, b, unpad_col, unpad_row, pad, translate, scale, check_finite, weight, ) if new_a.shape != new_b.shape: raise ValueError( f"Shape of A and B does not match: {new_a.shape} != {new_b.shape} " "Check pad, unpad_col, and unpad_row arguments.") # compute SVD of A.T * B u, _, vt = scipy.linalg.svd(np.dot(new_a.T, new_b), lapack_driver=lapack_driver) # construct S: an identity matrix with the smallest singular value replaced by sgn(|U*V^t|) s = np.eye(new_a.shape[1]) s[-1, -1] = np.sign(np.linalg.det(np.dot(u, vt))) # compute optimal rotational transformation r_opt = np.dot(np.dot(u, s), vt) # compute one-sided error error = compute_error(new_a, new_b, r_opt) return ProcrustesResult(error=error, new_a=new_a, new_b=new_b, t=r_opt, s=None)
def symmetric(a, b, pad=True, translate=False, scale=False, unpad_col=False, unpad_row=False, check_finite=True, weight=None, lapack_driver="gesvd"): r"""Perform symmetric Procrustes. Given a matrix :math:`\mathbf{A}_{m \times n}` and a reference matrix :math:`\mathbf{B}_{m \times n}` with :math:`m \geqslant n`, find the symmetrix transformation matrix :math:`\mathbf{X}_{n \times n}` that makes :math:`\mathbf{AX}` as close as possible to :math:`\mathbf{B}`. In other words, .. math:: \underbrace{\text{min}}_{\left\{\mathbf{X} \left| \mathbf{X} = \mathbf{X}^\dagger \right. \right\}} \|\mathbf{A} \mathbf{X} - \mathbf{B}\|_{F}^2 This Procrustes method requires the :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices to have the same shape with :math:`m \geqslant n`, which is guaranteed with the default ``pad`` argument for any given :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices. In preparing the :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices, the (optional) order of operations is: **1)** unpad zero rows/columns, **2)** translate the matrices to the origin, **3)** weight entries of :math:`\mathbf{A}`, **4)** scale the matrices to have unit norm, **5)** pad matrices with zero rows/columns so they have the same shape. Parameters ---------- a : ndarray The 2D-array :math:`\mathbf{A}` which is going to be transformed. b : ndarray The 2D-array :math:`\mathbf{B}` representing the reference matrix. pad : bool, optional Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape. translate : bool, optional If True, both arrays are centered at origin (columns of the arrays will have mean zero). scale : bool, optional If True, both arrays are normalized with respect to the Frobenius norm, i.e., :math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and :math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`. unpad_col : bool, optional If True, zero columns (with values less than 1.0e-8) on the right-hand side of the intial :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. unpad_row : bool, optional If True, zero rows (with values less than 1.0e-8) at the bottom of the intial :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices are removed. check_finite : bool, optional If True, convert the input to an array, checking for NaNs or Infs. weight : ndarray, optional The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}` matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`. lapack_driver : {'gesvd', 'gesdd'}, optional Whether to use the more efficient divide-and-conquer approach ('gesdd') or the more robust general rectangular approach ('gesvd') to compute the singular-value decomposition with `scipy.linalg.svd`. Returns ------- res : ProcrustesResult The Procrustes result represented as a class:`utils.ProcrustesResult` object. Notes ----- The optimal symmetrix matrix is obtained by, .. math:: \mathbf{X}_{\text{opt}} = \arg \underbrace{\text{min}}_{\left\{\mathbf{X} \left| \mathbf{X} = \mathbf{X}^\dagger \right. \right\}} \|\mathbf{A} \mathbf{X} - \mathbf{B}\|_{F}^2 = \underbrace{\text{min}}_{\left\{\mathbf{X} \left| \mathbf{X} = \mathbf{X}^\dagger \right. \right\}} \text{Tr}\left[\left(\mathbf{A}\mathbf{X} - \mathbf{B} \right)^\dagger \left(\mathbf{A}\mathbf{X} - \mathbf{B} \right)\right] Considering the singular value decomposition of :math:`\mathbf{A}`, .. math:: \mathbf{A}_{m \times n} = \mathbf{U}_{m \times m} \mathbf{\Sigma}_{m \times n} \mathbf{V}_{n \times n}^\dagger where :math:`\mathbf{\Sigma}_{m \times n}` is a rectangular diagonal matrix with non-negative singular values :math:`\sigma_i = [\mathbf{\Sigma}]_{ii}` listed in descending order, define .. math:: \mathbf{C}_{m \times n} = \mathbf{U}_{m \times m}^\dagger \mathbf{B}_{m \times n} \mathbf{V}_{n \times n} with elements denoted by :math:`c_{ij}`. Then we compute the symmetric matrix :math:`\mathbf{Y}_{n \times n}` with .. math:: [\mathbf{Y}]_{ij} = \begin{cases} 0 && i \text{ and } j > \text{rank} \left(\mathbf{A}\right) \\ \frac{\sigma_i c_{ij} + \sigma_j c_{ji}}{\sigma_i^2 + \sigma_j^2} && \text{otherwise} \end{cases} It is worth noting that the first part of this definition only applies in the unusual case where :math:`\mathbf{A}` has rank less than :math:`n`. The :math:`\mathbf{X}_\text{opt}` is given by .. math:: \mathbf{X}_\text{opt} = \mathbf{V Y V}^{\dagger} Examples -------- >>> import numpy as np >>> a = np.array([[5., 2., 8.], ... [2., 2., 3.], ... [1., 5., 6.], ... [7., 3., 2.]]) >>> b = np.array([[ 52284.5, 209138. , 470560.5], ... [ 22788.5, 91154. , 205096.5], ... [ 46139.5, 184558. , 415255.5], ... [ 22788.5, 91154. , 205096.5]]) >>> res = symmetric(a, b, pad=True, translate=True, scale=True) >>> res.t # symmetric transformation array array([[0.0166352 , 0.06654081, 0.14971682], [0.06654081, 0.26616324, 0.59886729], [0.14971682, 0.59886729, 1.34745141]]) >>> res.error # error 4.483083428047388e-31 """ # check inputs new_a, new_b = setup_input_arrays( a, b, unpad_col, unpad_row, pad, translate, scale, check_finite, weight, ) # if number of rows is less than column, the arrays are made square if (new_a.shape[0] < new_a.shape[1]) or (new_b.shape[0] < new_b.shape[1]): new_a, new_b = _zero_padding(new_a, new_b, "square") # if new_a.shape[0] < new_a.shape[1]: # raise ValueError(f"Shape of A {new_a.shape}=(m, n) needs to satisfy m >= n.") # # if new_b.shape[0] < new_b.shape[1]: # raise ValueError(f"Shape of B {new_b.shape}=(m, n) needs to satisfy m >= n.") # compute SVD of A & matrix C u, s, vt = scipy.linalg.svd(new_a, lapack_driver=lapack_driver) c = np.dot(np.dot(u.T, new_b), vt.T) # compute intermediate matrix Y n = new_a.shape[1] y = np.zeros((n, n)) for i in range(n): for j in range(n): if s[i]**2 + s[j]**2 != 0: y[i, j] = (s[i] * c[i, j] + s[j] * c[j, i]) / (s[i]**2 + s[j]**2) # compute optimum symmetric transformation matrix X x = np.dot(np.dot(vt.T, y), vt) error = compute_error(new_a, new_b, x) return ProcrustesResult(error=error, new_a=new_a, new_b=new_b, t=x, s=None)