示例#1
0
    def test_torch_function_with_multiple_output_on_local_var(self):
        x = Var(torch.FloatTensor([[1, 2], [2, 3], [5, 6]]))
        t, s = torch.max(x, 1)
        assert (t == Var(torch.FloatTensor([2, 3, 6]))).all()
        assert (s == Var(torch.LongTensor([1, 1, 1]))).all()

        x = Var(torch.FloatTensor([[0, 0], [0, 0]]))
        y, z = torch.eig(x, True)
        assert (y == Var(torch.FloatTensor([[0, 0], [0, 0]]))).all()
        assert (z == Var(torch.FloatTensor([[1, 0.], [0, 1]]))).all()


        x = Var(torch.FloatTensor([[0, 0], [1, 0]]))
        y, z = torch.qr(x)
        assert (y == Var(torch.FloatTensor([[0, -1], [-1, 0]]))).all()
        assert (z == Var(torch.FloatTensor([[-1, 0], [0, 0]]))).all()

        x = Var(torch.arange(1, 6))
        y, z = torch.kthvalue(x, 4)
        assert (y == Var(torch.FloatTensor([4]))).all()
        assert (z == Var(torch.LongTensor([3]))).all()

        x = Var(torch.zeros(3, 3))
        w, y, z = torch.svd(x)
        assert (w == Var(torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]]))).all()
        assert (y == Var(torch.FloatTensor([0, 0, 0]))).all()
        assert (z == Var(torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]]))).all()
示例#2
0
    def test_local_tensor_multi_var_methods(self):
        x = torch.FloatTensor([[1, 2], [2, 3], [5, 6]])
        t, s = torch.max(x, 1)
        assert (t == torch.FloatTensor([2, 3, 6])).float().sum() == 3
        assert (s == torch.LongTensor([1, 1, 1])).float().sum() == 3

        x = torch.FloatTensor([[0, 0], [1, 1]])
        y, z = torch.eig(x, True)
        assert (y == torch.FloatTensor([[1, 0], [0, 0]])).all()
        assert (torch.equal(z == torch.FloatTensor([[0, 0], [1, 0]]), torch.ByteTensor([[1, 0], [1, 0]])))

        x = torch.FloatTensor([[0, 0], [1, 0]])
        y, z = torch.qr(x)
        assert (y == torch.FloatTensor([[0, -1], [-1, 0]])).all()
        assert (z == torch.FloatTensor([[-1, 0], [0, 0]])).all()

        x = torch.arange(1, 6)
        y, z = torch.kthvalue(x, 4)
        assert (y == torch.FloatTensor([4])).all()
        assert (z == torch.LongTensor([3])).all()

        x = torch.zeros(3, 3)
        w, y, z = torch.svd(x)
        assert (w == torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])).all()
        assert (y == torch.FloatTensor([0, 0, 0])).all()
        assert (z == torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])).all()
示例#3
0
    def test_remote_tensor_multi_var_methods(self):
        hook = TorchHook(verbose=False)
        local = hook.local_worker
        remote = VirtualWorker(hook, 1)
        local.add_worker(remote)

        x = torch.FloatTensor([[1, 2], [4, 3], [5, 6]])
        x.send(remote)
        y, z = torch.max(x, 1)
        assert torch.equal(y.get(), torch.FloatTensor([2, 4, 6]))
        assert torch.equal(z.get(), torch.LongTensor([1, 0, 1]))

        x = torch.FloatTensor([[0, 0], [1, 0]]).send(remote)
        y, z = torch.qr(x)
        assert (y.get() == torch.FloatTensor([[0, -1], [-1, 0]])).all()
        assert (z.get() == torch.FloatTensor([[-1, 0], [0, 0]])).all()

        x = torch.arange(1, 6).send(remote)
        y, z = torch.kthvalue(x, 4)
        assert (y.get() == torch.FloatTensor([4])).all()
        assert (z.get() == torch.LongTensor([3])).all()

        x = torch.FloatTensor([[0, 0], [1, 1]]).send(remote)
        y, z = torch.eig(x, True)
        assert (y.get() == torch.FloatTensor([[1, 0], [0, 0]])).all()
        assert ((z.get() == torch.FloatTensor([[0, 0], [1, 0]])) == torch.ByteTensor([[1, 0], [1, 0]])).all()

        x = torch.zeros(3, 3).send(remote)
        w, y, z = torch.svd(x)
        assert (w.get() == torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])).all()
        assert (y.get() == torch.FloatTensor([0, 0, 0])).all()
        assert (z.get() == torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]])).all()
示例#4
0
    def test_torch_function_with_multiple_output_on_remote_var(self):
        hook = TorchHook(verbose=False)
        me = hook.local_worker
        remote = VirtualWorker(id=2, hook=hook)
        me.add_worker(remote)

        x = Var(torch.FloatTensor([[1, 2], [4, 3], [5, 6]]))
        x.send(remote)
        y, z = torch.max(x, 1)
        y.get()
        assert torch.equal(y, Var(torch.FloatTensor([2, 4, 6])))

        x = Var(torch.FloatTensor([[0, 0], [1, 0]])).send(remote)
        y, z = torch.qr(x)
        assert (y.get() == Var(torch.FloatTensor([[0, -1], [-1, 0]]))).all()
        assert (z.get() == Var(torch.FloatTensor([[-1, 0], [0, 0]]))).all()

        x = Var(torch.arange(1, 6)).send(remote)
        y, z = torch.kthvalue(x, 4)
        assert (y.get() == Var(torch.FloatTensor([4]))).all()
        assert (z.get() == Var(torch.LongTensor([3]))).all()

        x = Var(torch.FloatTensor([[0, 0], [0, 0]]))
        x.send(remote)
        y, z = torch.eig(x, True)
        assert (y.get() == Var(torch.FloatTensor([[0, 0], [0, 0]]))).all()
        assert (z.get() == Var(torch.FloatTensor([[1, 0.], [0, 1]]))).all()


        x = Var(torch.zeros(3, 3)).send(remote)
        w, y, z = torch.svd(x)
        assert (w.get() == Var(torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]]))).all()
        assert (y.get() == Var(torch.FloatTensor([0, 0, 0]))).all()
        assert (z.get() == Var(torch.FloatTensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]]))).all()
示例#5
0
def random_orthogonal(size):
    """
    Returns a random orthogonal matrix as a 2-dim tensor of shape [size, size].
    """

    # Use the QR decomposition of a random Gaussian matrix.
    x = torch.randn(size, size)
    q, _ = torch.qr(x)
    return q
示例#6
0
def make_linear_dataset(n, d, seed=0):
    """
    Create a dataset for training a deep linear network with n datapoints of dimension d.
    """
    torch.manual_seed(seed)
    X = (torch.qr(torch.randn(n, d))[0] * sqrt(n)).cuda()
    A = torch.randn(d, d).cuda()
    Y = X.mm(A.t())
    return TensorDataset(X, Y), TensorDataset(X, Y)
示例#7
0
def qr(A: torch.Tensor
       ) -> Tuple[torch.Tensor, torch.Tensor]:  # pragma: no cover
    """
    Like torch.linalg.qr.
    """
    if hasattr(torch, "linalg") and hasattr(torch.linalg, "qr"):
        # PyTorch version >= 1.9
        return torch.linalg.qr(A)
    return torch.qr(A)
示例#8
0
 def __init__(self, in_channel, out_channel=None):
     super().__init__()
     
     if out_channel is None:
         out_channel = in_channel
     weight = torch.randn(in_channel, out_channel)
     q, _ = torch.qr(weight)
     weight = q.unsqueeze(2).unsqueeze(3)
     self.weight = nn.Parameter(weight)
    def _init_cache_for_non_constant_diag(self, eye, batch_shape, n):
        # With non-constant diagonals, we cant factor out the noise as easily
        self._q_cache, self._r_cache = torch.qr(torch.cat((self._piv_chol_self / self._noise.sqrt(), eye), dim=-2))
        self._q_cache = self._q_cache[..., :n, :] / self._noise.sqrt()

        # Use the matrix determinant lemma for the logdet, using the fact that R'R = L_k'L_k + s*I
        logdet = self._r_cache.diagonal(dim1=-1, dim2=-2).abs().log().sum(-1).mul(2)
        logdet -= (1.0 / self._noise).log().sum([-1, -2])
        self._precond_logdet_cache = logdet.view(*batch_shape) if len(batch_shape) else logdet.squeeze()
示例#10
0
 def randomly_rotate(X):
     """Randomly rotate d,n data matrix X"""
     d, n = X.shape
     z = torch.randn((d, d), dtype=X.dtype)
     q, r = torch.qr(z)
     d = torch.diag(r)
     ph = d / abs(d)
     rot_mat = q * ph
     return rot_mat @ X
示例#11
0
        def orthogonal_square():
            """
            Create orthogonal square matrix using Gram-Schmidt

            Return:
              orthogonal random features 
            """
            q, _ = torch.qr(self.iid_gaussian(d, d))
            return q.T
示例#12
0
    def fit_A(self,
              data,
              sample='gaussian',
              weight=None,
              ndata_A=None,
              MSWD_p=2,
              MSWD_max_iter=200,
              pool=None,
              verbose=True):

        #fit the directions A to apply 1D transform

        if verbose:
            tstart = start_timing(self.A.device)

        if ndata_A is None or ndata_A > len(data):
            ndata_A = len(data)
        if sample != 'gaussian':
            if ndata_A > len(sample):
                ndata_A = len(sample)
            if ndata_A == len(sample):
                sample = sample.to(self.A.device)
            else:
                sample = sample[torch.randperm(
                    len(sample),
                    device=sample.device)[:ndata_A]].to(self.A.device)
        if ndata_A == len(data):
            data = data.to(self.A.device)
            if weight is not None:
                weight = weight.to(self.A.device)
        else:
            order = torch.randperm(len(data), device=data.device)[:ndata_A]
            data = data[order].to(self.A.device)
            if weight is not None:
                weight = weight[order].to(self.A.device)
        if weight is not None:
            weight = weight / torch.sum(weight)
            select = weight > 0
            data = data[select]
            weight = weight[select]

        A, SWD = maxKSWDdirection(data,
                                  x2=sample,
                                  weight=weight,
                                  K=self.K,
                                  maxiter=MSWD_max_iter,
                                  p=MSWD_p)
        with torch.no_grad():
            SWD, indices = torch.sort(SWD, descending=True)
            A = A[:, indices]
            self.A[:] = torch.qr(A)[0]

        if verbose:
            t = end_timing(tstart, self.A.device)
            print('Fit A:', 'Time:', t, 'Wasserstein Distance:', SWD.tolist())
        return self
示例#13
0
文件: utils.py 项目: qpmnh/wsod
def orthogonal_init(layers, mean=0.0, std=0.01):
    k = len(layers)
    ou_f = layers[0].out_features
    in_f = layers[0].in_features
    random = torch.randn((ou_f, in_f, k)) * std + mean
    q, r = torch.qr(random, some=True)

    for detector, init in zip(layers, q.permute(2, 0, 1)):
        detector.weight.data.copy_(init)
        nn.init.zeros_(detector.bias)
示例#14
0
文件: util.py 项目: yinnxinn/pymde
def hutchpp(linear_operator, dimension, n_queries):
    A = linear_operator
    d = dimension
    m = n_queries
    S = torch.randn(d, m // 3, device=A.device)
    G = torch.randn(d, m // 3, device=A.device)
    Q, _ = torch.qr(A.matvec(S))
    proj = G - Q @ (Q.T @ G)
    return torch.trace(
        Q.T @ A.matvec(Q)) + (3.0 / m) * torch.trace(proj.T @ A.matvec(proj))
示例#15
0
def cca_by_qr(x: Tensor, y: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
    """ CCA using QR and SVD.
    For more details, check Press 2011 "Canonical Correlation Clarified by Singular Value Decomposition"

    Args:
        x: input tensor of Shape DxH
        y: input tensor of shape DxW

    Returns: x-side coefficients, y-side coefficients, diagonal

    """

    q_1, r_1 = torch.qr(x)
    q_2, r_2 = torch.qr(y)
    qq = q_1.t() @ q_2
    u, diag, v = torch.svd(qq)
    a = torch.inverse(r_1) @ u
    b = torch.inverse(r_2) @ v
    return a, b, diag
示例#16
0
def get_logdet(jacobian):
    """
    Use QR factorisation to compute log absolute determinant of the jacobian matrix.
    NOTE: prefer `torch.slogdet` from Pytorch >= 0.4
    :param jacobian: (M, M)
    :return: log-determinant jacobian
    """
    Q, R = torch.qr(jacobian)
    log_det = torch.log(torch.diag(R).abs()).sum()
    return log_det
示例#17
0
def qr_retraction(tan_vec):  # tan_vec, p-by-n, p <= n
    [p, n] = tan_vec.size()
    tan_vec.t_()
    q, r = torch.qr(tan_vec)
    d = torch.diag(r, 0)
    ph = d.sign()
    q *= ph.expand_as(q)
    q.t_()

    return q
示例#18
0
def QR_features(x, degree=6):
    # x:[time, h, w, 1]
    time, h, w, _ = x.shape
    poly_x = poly_features(x, degree + 1).permute(1, 2, 0, -1)  # [h, w, time, features]
    ans = t.zeros((h, w, time, degree))
    for i in range(256 * 256):
        row = i // 256
        col = i % 256
        ans[row, col] = t.qr(poly_x[row, col])[0][:, 1:]
    return ans.permute(2, 0, 1, -1)
    def OrthogonalRandomFeature(self):
        n = self.n_features//self.depth
        remainder = self.n_features%self.depth
        orthogonal_features = []
        for _ in range(n):
            normal_feature = torch.rand(self.depth, self.depth)
            orthogonal_feature, _ = torch.qr(normal_feature)
            orthogonal_features.append(orthogonal_feature)
        
        if remainder > 0 :
            normal_feature = torch.rand(self.depth, self.depth)
            orthogonal_feature, _ = torch.qr(normal_feature)
            orthogonal_features.append(orthogonal_feature[0: remainder])
        
        orthogonal_features = torch.cat(orthogonal_features)
        mutilplier =  torch.randn(self.n_features, self.depth).norm(dim=1)
        final_features = torch.matmul(torch.diag(mutilplier), orthogonal_features)

        return final_features
示例#20
0
    def _init_cache_for_constant_diag(self, eye, batch_shape, n, k):
        # We can factor out the noise for for both QR and solves.
        self._noise = self._noise.narrow(-2, 0, 1)
        self._q_cache, self._r_cache = torch.qr(torch.cat((self._piv_chol_self, self._noise.sqrt() * eye), dim=-2))
        self._q_cache = self._q_cache[..., :n, :]

        # Use the matrix determinant lemma for the logdet, using the fact that R'R = L_k'L_k + s*I
        logdet = self._r_cache.diagonal(dim1=-1, dim2=-2).abs().log().sum(-1).mul(2)
        logdet = logdet + (n - k) * self._noise.squeeze(-2).squeeze(-1).log()
        self._precond_logdet_cache = logdet.view(*batch_shape) if len(batch_shape) else logdet.squeeze()
示例#21
0
def orthgonal_init(fc1, fc2):
    out_dim, in_dim = fc1.weight.data.shape
    device = fc1.weight.data.device
    for i in np.arange(out_dim):
        weight_random = torch.randn([in_dim, 2]).to(device)
        Q, R = torch.qr(weight_random)
        fc1.weight.data[i, :] = Q[:, 0]
        fc2.weight.data[i, :] = Q[:, 1]
    fc1.bias.data.zero_()
    fc2.bias.data.zero_()
示例#22
0
文件: tensorial.py 项目: kharyuk/vbtd
 def orthogonolize(self, last_core=True):
     for k in range(self.d):
         tmp = torch_utils.reshape_torch(self.cores[k].data, [self.r[k]*self.n[k], -1], use_batch=False)
         if k > 0:
             tmp = r.mm(tmp)
         if (k == self.d-1) and (not last_core):
             self.cores[k].data = torch_utils.reshape_torch(tmp, [self.r[k], self.n[k], -1], use_batch=False)
             continue
         q, r = torch.qr(tmp)
         self.cores[k].data = torch_utils.reshape_torch(q, [self.r[k], self.n[k], -1], use_batch=False)
示例#23
0
def oneStepVarQR(J, Q):
    Z = torch.matmul(
        torch.transpose(J.float(), 1, 2),
        Q)  # Linear extrapolation of the network in many directions
    q, r = torch.qr(Z, some=True)  # QR decomposition of new directions
    s = torch.diag_embed(torch.sign(torch.diagonal(
        r, dim1=1, dim2=2)))  # extract sign of each leading r value
    return torch.matmul(q, s), torch.diagonal(
        torch.matmul(s, r), dim1=1,
        dim2=2)  # return positive r values and corresponding vectors
def _orthogonalize_tt_cores_right_to_left(tt):
    """Orthogonalize TT-cores of a TT-object in the right to left order.
    Args:
        tt: TenosorTrain or a TensorTrainBatch.
    Returns:
        The same type as the input `tt` (TenosorTrain or a TensorTrainBatch).
    """
    # Left to right orthogonalization.
    ndims = tt.ndims
    raw_shape = tt.get_raw_shape()
    tt_ranks = tt.get_tt_ranks()
    prev_rank = tt_ranks[ndims]
    # Copy cores reference so we can change the cores.
    tt_cores = list(tt.tt_cores)
    for core_idx in range(ndims - 1, 0, -1):
        curr_core = tt_cores[core_idx]
        # TT-ranks could have changed on the previous iteration, so `tt_ranks` can
        # be outdated for the current TT-rank, but should be valid for the next
        # TT-rank.
        curr_rank = prev_rank
        prev_rank = tt_ranks[core_idx]
        if tt.is_tt_matrix():
            curr_mode_left = raw_shape[0][core_idx]
            curr_mode_right = raw_shape[1][core_idx]
            curr_mode = curr_mode_left * curr_mode_right
        else:
            curr_mode = raw_shape[0][core_idx]

        qr_shape = (prev_rank, curr_mode * curr_rank)
        curr_core = curr_core.reshape(qr_shape)
        curr_core, triang = torch.qr(curr_core.t())
        curr_core = curr_core.t()
        triang_shape = triang.shape

        # The TT-rank could have changed: if qr_shape is e.g. 4 x 10, than q would
        # be of size 4 x 4 and r would be 4 x 10, which means that the next rank
        # should be changed to 4.
        prev_rank = triang_shape[1]
        if tt.is_tt_matrix():
            new_core_shape = (prev_rank, curr_mode_left, curr_mode_right,
                              curr_rank)
        else:
            new_core_shape = (prev_rank, curr_mode, curr_rank)
        tt_cores[core_idx] = curr_core.reshape(new_core_shape)

        prev_core = tt_cores[core_idx - 1].reshape(-1, triang_shape[0])
        tt_cores[core_idx - 1] = torch.mm(prev_core, triang)

    if tt.is_tt_matrix():
        first_core_shape = (1, raw_shape[0][0], raw_shape[1][0], prev_rank)
    else:
        first_core_shape = (1, raw_shape[0][0], prev_rank)
    tt_cores[0] = tt_cores[0].reshape(first_core_shape)
    # TODO: infer the tt_ranks
    return TensorTrain(tt_cores, tt.get_raw_shape())
示例#25
0
def gpu_tsvd(
        A: torch.Tensor,
        k: int,
        n_iter: int = 2,
        n_oversamples: int = 8
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
    """
    GPU Truncated SVD. Based on fbpca's version.
    
    Parameters
    ----------
    A : (M, N) torch.Tensor
    k : int
    n_iter : int
    n_oversamples : int
    
    Returns
    -------
    u : (M, k) torch.Tensor
    s : (k,) torch.Tensor
    vt : (k, N) torch.Tensor
    """
    m, n = A.shape
    Q = torch.rand(n, k + n_oversamples)
    Q = A @ Q

    Q, _ = torch.qr(Q)

    # Power iterations
    for _ in range(n_iter):
        Q = (Q.t() @ A).t()
        Q, _ = torch.qr(Q)
        Q = A @ Q
        Q, _ = torch.qr(Q)

    QA = Q.t() @ A
    # Transpose QA to make it tall-skinny as MAGMA has optimisations for this
    # (USVt)t = VStUt
    Va, s, R = torch.svd(QA.t(), some=True)
    U = Q @ R.t()

    return U[:, :k], s[:k], Va.t()[:k, :]
示例#26
0
    def random_svd(self, A, k):
        if self._bsize == 0:
            bsize = k
        u = A.new_zeros((1, A.shape[1]))
        l = A.new_zeros((A.shape[0], 1))

        if A.shape[0] < A.shape[1]:
            n = A.shape[0]
            ind = 0
        else:
            n = A.shape[1]
            ind = 1
        tpose = False

        if ind == 0:
            tpose = True
            l = torch.t(u)
            u = A.new_ones((1, A.shape[0]))
            A = torch.t(A)
        K = A.new_zeros((A.shape[1], bsize * self._q))
        block = torch.randn(A.shape[1], bsize).to(A.device)
        block, _ = torch.qr(block)
        T = A.new_zeros((A.shape[1], bsize))

        for i in range(self._q):
            T = torch.matmul(A, block) - torch.matmul(l, torch.matmul(
                u, block))
            block = torch.matmul(torch.t(A), T) - torch.matmul(
                torch.t(u), torch.matmul(torch.t(l), T))
            block, _ = torch.qr(block)
            K[:, int(i * bsize):int((i + 1) * bsize)] = block.clone().detach()
        Q, _ = torch.qr(K)
        T = torch.matmul(A, Q) - torch.matmul(l, torch.matmul(u, Q))
        Ut, St, Vt = torch.svd(T)
        S = St[0:k]
        if tpose:
            V = Ut[:, 0:k]
            U = torch.matmul(Q, Vt[:, 0:k])
        else:
            U = Ut[:, 0:k]
            V = torch.matmul(Q, Vt[:, 0:k])
        return U, S, V
示例#27
0
 def projected(self):
     a = self.active
     L = self.K[a][:, a].cholesky()
     sigma = self.sigma
     A = torch.cat([self.K[:, a] / sigma.view(-1, 1), L.t()])
     O = torch.zeros(L.size(0)).type(L.type())
     Y = torch.cat([self.y / sigma, O])
     Q, R = torch.qr(A)
     mu = R.inverse() @ Q.t() @ Y
     delta = ((self.K[:, a] @ mu).view(-1) - self.y).abs()
     return a, mu, delta.mean(), delta.max()
示例#28
0
    def variable_with_orth_weight_decay(self, shape):
        s1 = torch.tensor(shape[1], dtype=torch.int32).to('cuda')
        s2 = torch.tensor(shape[1] / 2, dtype=torch.int32).to('cuda')
        w0_init, _ = torch.qr(torch.normal(0, 1, size=(s1, s2)))
        w0 = torch.nn.Parameter(w0_init).to('cuda')

        tmp1 = w0.view(1, s1, s2)
        tmp2 = w0.transpose(0, 1).view(1, s2, s1)
        tmp1 = self.tile(tmp1, 0, shape[0])
        tmp2 = self.tile(tmp2, 0, shape[0])
        return tmp1, tmp2
示例#29
0
 def to_orthogonal_matrix(self, seed):
     """
     return orthogonal matrix given arbitrary square matrix seed.
     random matrices are uniformly distributed according to the Haar measure
     as explained here: https://arxiv.org/pdf/math-ph/0609050.pdf
     """
     q, r = torch.qr(seed)
     d = r.diag()
     ph = d / d.abs()
     output = q @ ph.diag() @ q
     return output
示例#30
0
    def __init__(self,
                 dim,
                 scramble=False,
                 hetero=True,
                 hidden=False,
                 child=True,
                 ones=True,
                 noise_identity=True):
        self.hetero = hetero
        self.hidden = hidden
        self.dim = dim // 2
        print("ones" + str(ones))

        if ones:
            self.wxy = torch.eye(self.dim)
            if child:
                print("child " + str(child))
                self.wyz = torch.eye(self.dim)
            else:
                self.wyz = torch.zeros(self.dim, self.dim)
        else:
            self.wxy = torch.randn(self.dim, self.dim) / dim
            if child:
                self.wyz = torch.randn(self.dim, self.dim) / dim
            else:
                self.wyz = torch.zeros(self.dim, self.dim)
        if scramble:
            self.scramble, _ = torch.qr(torch.randn(dim, dim))
        else:
            self.scramble = torch.eye(dim)

        if hidden:
            if noise_identity == 0:
                print("noise_identity " + str(noise_identity))
                self.whx = torch.randn(self.dim, self.dim) / dim
                self.why = torch.randn(self.dim, self.dim) / dim
                self.whz = torch.randn(self.dim, self.dim) / dim
            else:
                if noise_identity == 1:
                    print("noise_identity " + str(noise_identity))
                    self.whx = torch.eye(self.dim, self.dim)
                    self.why = torch.eye(self.dim, self.dim)
                    self.whz = torch.eye(self.dim, self.dim)
                else:
                    if noise_identity == 2:
                        print("noise_identity " + str(noise_identity))
                        self.whx = torch.rand(self.dim, self.dim) / dim
                        self.why = torch.rand(self.dim, self.dim) / dim
                        self.whz = torch.rand(self.dim, self.dim) / dim

        else:
            self.whx = torch.eye(self.dim, self.dim)
            self.why = torch.zeros(self.dim, self.dim)
            self.whz = torch.zeros(self.dim, self.dim)
示例#31
0
    def __init__(self):
        super(Invertible_1x1_Conv, self).__init__()
        assert hp.Decoder.Num_Split % 2 == 0

        weight = torch.qr(
            torch.FloatTensor(hp.Decoder.Num_Split,
                              hp.Decoder.Num_Split).normal_())[0]
        if torch.det(weight) < 0:
            weight[:, 0] = -weight[:, 0]

        self.weight = torch.nn.Parameter(weight)
示例#32
0
    def __init__(self, channels, n_split=4, no_jacobian=False, **kwargs):
        super().__init__()
        assert(n_split % 2 == 0)
        self.channels = channels
        self.n_split = n_split
        self.no_jacobian = no_jacobian

        w_init = torch.qr(torch.FloatTensor(self.n_split, self.n_split).normal_())[0]
        if torch.det(w_init) < 0:
            w_init[:,0] = -1 * w_init[:,0]
        self.weight = nn.Parameter(w_init)
示例#33
0
    def forward(ctx, input):
        [bs, r, d] = input.size()

        Q = torch.zeros(bs, r, d, dtype=torch.float64).double().cuda()
        R = torch.zeros(bs, d, d, dtype=torch.float64).double().cuda()
        for id in range(bs):
            Q[id, :, :], R[id, :, :] = torch.qr(input[id, :, :].squeeze())

        ctx.save_for_backward(input, Q, R)

        return Q, R
示例#34
0
def orthogonal_matrix_chunk(cols, qr_uniform_q=False, device=None):
    unstructured_block = torch.randn((cols, cols), device=device)
    q, r = torch.qr(unstructured_block.cpu(), some=True)
    q, r = map(lambda t: t.to(device), (q, r))

    # proposed by @Parskatt
    # to make sure Q is uniform https://arxiv.org/pdf/math-ph/0609050.pdf
    if qr_uniform_q:
        d = torch.diag(r, 0)
        q *= d.sign()
    return q.t()
示例#35
0
def from_eigen(eigen):
    '''Construct a random matrix with given the eigenvalues.

    To construct such a matrix form the eigenvalue decomposition,
    (i.e. U * Sigma * U.t()), we need to find a unitary matrix U
    and Sigma is the diagonal matrix of the eigenvalues `eigen`.
    The matrix U can be the unitary matrix Q from
    the QR-decomposition of a randomly generated matrix.

    Args:
        eigen: A vector of size (Batch, Size).

    Returns:
        A random matrix of size (Batch, Size, Size).
    '''
    size = eigen.size(-1)
    Q, _ = torch.qr(torch.randn(
        (size, size), dtype=eigen.dtype, device=eigen.device))
    return mul_diag(Q, eigen).matmul(Q.t())
示例#36
0
文件: init.py 项目: athiwatp/pytorch
def orthogonal(tensor, gain=1):
    """Fills the input Tensor or Variable with a (semi) orthogonal matrix, as
    described in "Exact solutions to the nonlinear dynamics of learning in deep
    linear neural networks" - Saxe, A. et al. (2013). The input tensor must have
    at least 2 dimensions, and for tensors with more than 2 dimensions the
    trailing dimensions are flattened.

    Args:
        tensor: an n-dimensional torch.Tensor or autograd.Variable, where n >= 2
        gain: optional scaling factor

    Examples:
        >>> w = torch.Tensor(3, 5)
        >>> nn.init.orthogonal(w)
    """
    if isinstance(tensor, Variable):
        orthogonal(tensor.data, gain=gain)
        return tensor

    if tensor.ndimension() < 2:
        raise ValueError("Only tensors with 2 or more dimensions are supported")

    rows = tensor.size(0)
    cols = tensor[0].numel()
    flattened = torch.Tensor(rows, cols).normal_(0, 1)
    # Compute the qr factorization
    q, r = torch.qr(flattened)
    # Make Q uniform according to https://arxiv.org/pdf/math-ph/0609050.pdf
    d = torch.diag(r, 0)
    ph = d.sign()
    q *= ph.expand_as(q)
    # Pad zeros to Q (if rows smaller than cols)
    if rows < cols:
        padding = torch.zeros(rows, cols - rows)
        if q.is_cuda:
            q = torch.cat([q, padding.cuda()], 1)
        else:
            q = torch.cat([q, padding], 1)

    tensor.view_as(q).copy_(q)
    tensor.mul_(gain)
    return tensor
示例#37
0
文件: init.py 项目: xiongyw/pytorch
def orthogonal_(tensor, gain=1):
    r"""Fills the input `Tensor` with a (semi) orthogonal matrix, as
    described in "Exact solutions to the nonlinear dynamics of learning in deep
    linear neural networks" - Saxe, A. et al. (2013). The input tensor must have
    at least 2 dimensions, and for tensors with more than 2 dimensions the
    trailing dimensions are flattened.

    Args:
        tensor: an n-dimensional `torch.Tensor`, where :math:`n \geq 2`
        gain: optional scaling factor

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.orthogonal_(w)
    """
    if tensor.ndimension() < 2:
        raise ValueError("Only tensors with 2 or more dimensions are supported")

    rows = tensor.size(0)
    cols = tensor[0].numel()
    flattened = tensor.new(rows, cols).normal_(0, 1)

    if rows < cols:
        flattened.t_()

    # Compute the qr factorization
    q, r = torch.qr(flattened)
    # Make Q uniform according to https://arxiv.org/pdf/math-ph/0609050.pdf
    d = torch.diag(r, 0)
    ph = d.sign()
    q *= ph

    if rows < cols:
        q.t_()

    with torch.no_grad():
        tensor.view_as(q).copy_(q)
        tensor.mul_(gain)
    return tensor
示例#38
0
文件: svd.py 项目: VChristiaens/VIP
def randomized_svd_gpu(M, n_components, n_oversamples=10, n_iter='auto',
                       transpose='auto', random_state=0, lib='cupy'):
    """Computes a truncated randomized SVD on GPU. Adapted from Sklearn.

    Parameters
    ----------
    M : ndarray or sparse matrix
        Matrix to decompose
    n_components : int
        Number of singular values and vectors to extract.
    n_oversamples : int (default is 10)
        Additional number of random vectors to sample the range of M so as
        to ensure proper conditioning. The total number of random vectors
        used to find the range of M is n_components + n_oversamples. Smaller
        number can improve speed but can negatively impact the quality of
        approximation of singular vectors and singular values.
    n_iter : int or 'auto' (default is 'auto')
        Number of power iterations. It can be used to deal with very noisy
        problems. When 'auto', it is set to 4, unless `n_components` is small
        (< .1 * min(X.shape)) `n_iter` in which case is set to 7.
        This improves precision with few components.
    transpose : True, False or 'auto' (default)
        Whether the algorithm should be applied to M.T instead of M. The
        result should approximately be the same. The 'auto' mode will
        trigger the transposition if M.shape[1] > M.shape[0] since this
        implementation of randomized SVD tend to be a little faster in that
        case.
    random_state : int, RandomState instance or None, optional (default=None)
        The seed of the pseudo random number generator to use when shuffling
        the data.  If int, random_state is the seed used by the random number
        generator; If RandomState instance, random_state is the random number
        generator; If None, the random number generator is the RandomState
        instance used by `np.random`.
    lib : {'cupy', 'pytorch'}, str optional
        Chooses the GPU library to be used.

    Notes
    -----
    This algorithm finds a (usually very good) approximate truncated
    singular value decomposition using randomization to speed up the
    computations. It is particularly fast on large matrices on which
    you wish to extract only a small number of components. In order to
    obtain further speed up, `n_iter` can be set <=2 (at the cost of
    loss of precision).

    References
    ----------
    * Finding structure with randomness: Stochastic algorithms for constructing
      approximate matrix decompositions
      Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061
    * A randomized algorithm for the decomposition of matrices
      Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert
    * An implementation of a randomized algorithm for principal component
      analysis
      A. Szlam et al. 2014
    """
    random_state = check_random_state(random_state)
    n_random = n_components + n_oversamples
    n_samples, n_features = M.shape

    if n_iter == 'auto':
        # Checks if the number of iterations is explicitly specified
        n_iter = 7 if n_components < .1 * min(M.shape) else 4

    if transpose == 'auto':
        transpose = n_samples < n_features
    if transpose:
        M = M.T # this implementation is a bit faster with smaller shape[1]

    if lib == 'cupy':
        M = cupy.array(M)
        M = cupy.asarray(M)

        # Generating normal random vectors with shape: (M.shape[1], n_random)
        Q = random_state.normal(size=(M.shape[1], n_random))
        Q = cupy.array(Q)
        Q = cupy.asarray(Q)

        # Perform power iterations with Q to further 'imprint' the top
        # singular vectors of M in Q
        for i in range(n_iter):
            Q = cupy.dot(M, Q)
            Q = cupy.dot(M.T, Q)

        # Sample the range of M using by linear projection of Q. Extract an orthonormal basis
        Q, _ = cupy.linalg.qr(cupy.dot(M, Q), mode='reduced')

        # project M to the (k + p) dimensional space using the basis vectors
        B = cupy.dot(Q.T, M)

        B = cupy.array(B)
        Q = cupy.array(Q)
        # compute the SVD on the thin matrix: (k + p) wide
        Uhat, s, V = cupy.linalg.svd(B, full_matrices=False, compute_uv=True)
        del B
        U = cupy.dot(Q, Uhat)

        if transpose:
            # transpose back the results according to the input convention
            return V[:n_components, :].T, s[:n_components], U[:,
                                                            :n_components].T
        else:
            return U[:, :n_components], s[:n_components], V[:n_components, :]

    elif lib == 'pytorch':
        M_gpu = torch.Tensor.cuda(torch.from_numpy(M.astype('float32')))

        # Generating normal random vectors with shape: (M.shape[1], n_random)
        Q = torch.cuda.FloatTensor(M_gpu.shape[1], n_random).normal_()

        # Perform power iterations with Q to further 'imprint' the top
        # singular vectors of M in Q
        for i in range(n_iter):
            Q = torch.mm(M_gpu, Q)
            Q = torch.mm(torch.transpose(M_gpu, 0, 1), Q)

        # Sample the range of M using by linear projection of Q. Extract an orthonormal basis
        Q, _ = torch.qr(torch.mm(M_gpu, Q))

        # project M to the (k + p) dimensional space using the basis vectors
        B = torch.mm(torch.transpose(Q, 0, 1), M_gpu)

        # compute the SVD on the thin matrix: (k + p) wide
        Uhat, s, V = torch.svd(B)
        del B
        U = torch.mm(Q, Uhat)

        if transpose:
            # transpose back the results according to the input convention
            return (torch.transpose(V[:n_components, :], 0, 1),
                    s[:n_components],
                    torch.transpose(U[:, :n_components], 0, 1))
        else:
            return U[:, :n_components], s[:n_components], V[:n_components, :]