def _qr_batched(a, mode): batch_shape = a.shape[:-2] batch_size = internal.prod(batch_shape) m, n = a.shape[-2:] k = min(m, n) # first handle any 0-size inputs if batch_size == 0 or k == 0: # support float32, float64, complex64, and complex128 dtype, out_dtype = _util.linalg_common_type(a) if mode == 'reduced': return (cupy.empty(batch_shape + (m, k), out_dtype), cupy.empty(batch_shape + (k, n), out_dtype)) elif mode == 'complete': q = _util.stacked_identity(batch_shape, m, out_dtype) return (q, cupy.empty(batch_shape + (m, n), out_dtype)) elif mode == 'r': return cupy.empty(batch_shape + (k, n), out_dtype) elif mode == 'raw': return (cupy.empty(batch_shape + (n, m), out_dtype), cupy.empty(batch_shape + (k,), out_dtype)) # ...then delegate real computation to cuSOLVER/rocSOLVER a = a.reshape(-1, *(a.shape[-2:])) out = _geqrf_orgqr_batched(a, mode) if mode == 'r': return out.reshape(batch_shape + out.shape[-2:]) q, r = out q = q.reshape(batch_shape + q.shape[-2:]) idx = -1 if mode == 'raw' else -2 r = r.reshape(batch_shape + r.shape[idx:]) return (q, r)
def check_usv(self, shape, dtype): array = testing.shaped_random(shape, numpy, dtype=dtype, seed=self.seed) a_cpu = numpy.asarray(array, dtype=dtype) a_gpu = cupy.asarray(array, dtype=dtype) result_cpu = numpy.linalg.svd(a_cpu, full_matrices=self.full_matrices) result_gpu = cupy.linalg.svd(a_gpu, full_matrices=self.full_matrices) # Check if the input matrix is not broken cupy.testing.assert_allclose(a_gpu, a_cpu) assert len(result_gpu) == 3 for i in range(3): assert result_gpu[i].shape == result_cpu[i].shape assert result_gpu[i].dtype == result_cpu[i].dtype u_cpu, s_cpu, vh_cpu = result_cpu u_gpu, s_gpu, vh_gpu = result_gpu cupy.testing.assert_allclose(s_gpu, s_cpu, rtol=1e-5, atol=1e-4) # reconstruct the matrix k = s_cpu.shape[-1] if len(shape) == 2: if self.full_matrices: a_gpu_usv = cupy.dot(u_gpu[:, :k] * s_gpu, vh_gpu[:k, :]) else: a_gpu_usv = cupy.dot(u_gpu * s_gpu, vh_gpu) else: if self.full_matrices: a_gpu_usv = cupy.matmul(u_gpu[..., :k] * s_gpu[..., None, :], vh_gpu[..., :k, :]) else: a_gpu_usv = cupy.matmul(u_gpu * s_gpu[..., None, :], vh_gpu) cupy.testing.assert_allclose(a_gpu, a_gpu_usv, rtol=1e-4, atol=1e-4) # assert unitary u_len = u_gpu.shape[-1] vh_len = vh_gpu.shape[-2] cupy.testing.assert_allclose( cupy.matmul(u_gpu.swapaxes(-1, -2).conj(), u_gpu), _util.stacked_identity(shape[:-2], u_len, dtype), atol=1e-4) cupy.testing.assert_allclose( cupy.matmul(vh_gpu, vh_gpu.swapaxes(-1, -2).conj()), _util.stacked_identity(shape[:-2], vh_len, dtype), atol=1e-4)
def _svd_batched(a, full_matrices, compute_uv): batch_shape = a.shape[:-2] batch_size = internal.prod(batch_shape) n, m = a.shape[-2:] dtype, uv_dtype = _util.linalg_common_type(a) s_dtype = uv_dtype.char.lower() # first handle any 0-size inputs if batch_size == 0: k = min(m, n) s = cupy.empty(batch_shape + (k, ), s_dtype) if compute_uv: if full_matrices: u = cupy.empty(batch_shape + (n, n), dtype=uv_dtype) vt = cupy.empty(batch_shape + (m, m), dtype=uv_dtype) else: u = cupy.empty(batch_shape + (n, k), dtype=uv_dtype) vt = cupy.empty(batch_shape + (k, m), dtype=uv_dtype) return u, s, vt else: return s elif m == 0 or n == 0: s = cupy.empty(batch_shape + (0, ), s_dtype) if compute_uv: if full_matrices: u = _util.stacked_identity(batch_shape, n, uv_dtype) vt = _util.stacked_identity(batch_shape, m, uv_dtype) else: u = cupy.empty(batch_shape + (n, 0), dtype=uv_dtype) vt = cupy.empty(batch_shape + (0, m), dtype=uv_dtype) return u, s, vt else: return s # ...then delegate real computation to cuSOLVER a = a.reshape(-1, *(a.shape[-2:])) if runtime.is_hip or (m <= 32 and n <= 32): # copy is done in _gesvdj_batched, so let's try not to do it here a = a.astype(dtype, order='C', copy=False) out = _gesvdj_batched(a, full_matrices, compute_uv, False) else: # manually loop over cusolverDn<t>gesvd() # copy (via possible type casting) is done in _gesvd_batched # note: _gesvd_batched returns V, not V^H out = _gesvd_batched(a, dtype.char, full_matrices, compute_uv, False) if compute_uv: u, s, v = out u = u.astype(uv_dtype, copy=False) u = u.reshape(*batch_shape, *(u.shape[-2:])) s = s.astype(s_dtype, copy=False) s = s.reshape(*batch_shape, *(s.shape[-1:])) v = v.astype(uv_dtype, copy=False) v = v.reshape(*batch_shape, *(v.shape[-2:])) return u, s, v.swapaxes(-2, -1).conj() else: s = out s = s.astype(s_dtype, copy=False) s = s.reshape(*batch_shape, *(s.shape[-1:])) return s