Пример #1
0
    def perform(self, node, inputs, outputs):
        context = inputs[0][0].context

        # Input matrix.
        A = inputs[0]

        l, n = A.shape
        if l != n:
            raise ValueError('A must be a square matrix')

        lda = max(1, n)

        # cusolver operates on F ordered matrices, but A is expected
        # to be symmetric so it does not matter.
        # We copy A if needed
        if self.inplace:
            L = A
        else:
            L = pygpu.array(A, copy=True)

        # The output matrix will contain only the upper or lower
        # triangular factorization of A. If L is C ordered (it
        # probably is as it is the default in Theano) we just switch
        # the fill mode parameter of cusolver
        l_parameter = 0 if self.lower else 1
        if L.flags['C_CONTIGUOUS']:
            l_parameter = 1 - l_parameter

        L_ptr = L.gpudata

        with context:
            workspace_size = cusolver.cusolverDnSpotrf_bufferSize(
                context.cusolver_handle, l_parameter, n, L_ptr, lda)

            workspace = pygpu.zeros(workspace_size,
                                    dtype='float32',
                                    context=context)

            dev_info = pygpu.zeros((1, ), dtype='int32', context=context)

            workspace_ptr = workspace.gpudata
            dev_info_ptr = dev_info.gpudata

            cusolver.cusolverDnSpotrf(context.cusolver_handle, l_parameter, n,
                                      L_ptr, lda, workspace_ptr,
                                      workspace_size, dev_info_ptr)

            val_dev_info = np.asarray(dev_info)[0]
            if val_dev_info > 0:
                raise LinAlgError('Cholesky decomposition failed (is A SPD?)')

        # cusolver leaves the elements in the matrix outside the considered
        # upper or lower triangle unchanged, so we need to put zeros outside
        # the triangle
        if self.lower:
            tril(L)
        else:
            triu(L)

        outputs[0][0] = L
Пример #2
0
    def perform(self, node, inputs, outputs):
        context = inputs[0][0].context

        # Input matrix.
        A = inputs[0]

        l, n = A.shape
        if l != n:
            raise ValueError('A must be a square matrix')

        lda = max(1, n)

        # cusolver operates on F ordered matrices, but A is expected
        # to be symmetric so it does not matter.
        # We copy A if needed
        if self.inplace:
            L = A
        else:
            L = pygpu.array(A, copy=True)

        # The output matrix will contain only the upper or lower
        # triangular factorization of A. If L is C ordered (it
        # probably is as it is the default in Theano) we just switch
        # the fill mode parameter of cusolver
        l_parameter = 0 if self.lower else 1
        if L.flags['C_CONTIGUOUS']:
            l_parameter = 1 - l_parameter

        L_ptr = L.gpudata

        with context:
            workspace_size = cusolver.cusolverDnSpotrf_bufferSize(
                context.cusolver_handle, l_parameter, n, L_ptr, lda)

            workspace = pygpu.zeros(workspace_size, dtype='float32',
                                    context=context)

            dev_info = pygpu.zeros((1,), dtype='int32', context=context)

            workspace_ptr = workspace.gpudata
            dev_info_ptr = dev_info.gpudata

            cusolver.cusolverDnSpotrf(
                context.cusolver_handle, l_parameter, n, L_ptr, lda, workspace_ptr,
                workspace_size, dev_info_ptr)

            val_dev_info = np.asarray(dev_info)[0]
            if val_dev_info > 0:
                raise LinAlgError('Cholesky decomposition failed (is A SPD?)')

        # cusolver leaves the elements in the matrix outside the considered
        # upper or lower triangle unchanged, so we need to put zeros outside
        # the triangle
        if self.lower:
            tril(L)
        else:
            triu(L)

        outputs[0][0] = L
Пример #3
0
def run_tril(dtype, shape, order, inplace):
    ac, ag = gen_gpuarray(shape, dtype, order=order, ctx=context)
    result = tril(ag, inplace=inplace)
    assert numpy.all(numpy.tril(ac) == result)
    if inplace:
        assert numpy.all(numpy.tril(ac) == ag)
    else:
        assert numpy.all(ac == ag)
Пример #4
0
def test_tril():
    for shape in [(10, 5), (5, 10), (10, 10)]:
        for order in ['c', 'f']:
            for inplace in [True, False]:
                ac, ag = gen_gpuarray(shape, 'float32',
                                      order=order, ctx=context)
                result = tril(ag, inplace=inplace)
                assert numpy.all(numpy.tril(ac) == result)
                if inplace:
                    assert numpy.all(numpy.tril(ac) == ag)
                else:
                    assert numpy.all(ac == ag)
Пример #5
0
def test_tril():
    for shape in [(10, 5), (5, 10), (10, 10)]:
        for order in ['c', 'f']:
            for inplace in [True, False]:
                ac, ag = gen_gpuarray(shape,
                                      'float32',
                                      order=order,
                                      ctx=context)
                result = tril(ag, inplace=inplace)
                assert numpy.all(numpy.tril(ac) == result)
                if inplace:
                    assert numpy.all(numpy.tril(ac) == ag)
                else:
                    assert numpy.all(ac == ag)
Пример #6
0
 def run_noncontiguous_tril(self):
     a = numpy.random.rand(5, 5)
     b = pygpu.array(a, context=context)
     b = b[::-1]
     assert b.flags.c_contiguous is b.flags.f_contiguous is False
     tril(b)
Пример #7
0
 def run_3d_tril(self):
     ac, ag = gen_gpuarray((10, 10, 10), 'float32', ctx=context)
     tril(ag)
Пример #8
0
 def run_noncontiguous_tril(self):
     a = numpy.random.rand(5, 5)
     a = a[::-1]
     b = pygpu.array(a, context=context)
     assert b.flags.c_contiguous is b.flags.f_contiguous is False
     tril(b)
Пример #9
0
 def run_3d_tril(self):
     ac, ag = gen_gpuarray((10, 10, 10), 'float32', ctx=context)
     tril(ag)