Exemplo n.º 1
0
                    cudaGetErrorString(sts),
                    n_blocks.x,
                    n_blocks.y,
                    n_threads.x,
                    n_threads.y,
                    n_threads.z,
                    n_shared);
                %(fail)s;
            }

        } // END NESTED SCOPE
        """ % locals()


def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
    return GpuImages2Neibs(mode)(ten4, neib_shape, neib_step)


@local_optimizer([Images2Neibs])
def use_gpu_images2neibs(node):
    if (type(node.op) is Images2Neibs and
        node.inputs[0].dtype == 'float32' and
        node.op.mode in ['valid', 'ignore_borders',
                         'wrap_centered']):
        return [host_from_gpu(gpu_images2neibs(gpu_from_host(node.inputs[0]),
                                               node.inputs[1], node.inputs[2],
                                               mode=node.op.mode))]

if cuda_available:
    register_gpu_opt()(use_gpu_images2neibs)
Exemplo n.º 2
0
        return code


@local_optimizer([CumsumOp])
def use_gpu_cumsum(node):
    if type(node.op) is CumsumOp \
       and node.inputs[0].dtype == 'float32' \
       and node.inputs[0].owner \
       and isinstance(node.inputs[0].owner.op, HostFromGpu):

        axis = node.op.axis
        x = node.inputs[0]

        if axis is not None and x.ndim > GpuCumsum.SUPPORTED_NDIMS:
            return None

        x = gpu_from_host(x)

        if axis is None and x.ndim > 1:
            x = GpuFlatten()(x)

        # ``gpu_cumsum`` assume array has been flattened if needed.
        if axis is None:
            axis = 0

        return [host_from_gpu(GpuCumsum(axis)(x))]

if cuda_available:
    register_gpu_opt()(use_gpu_cumsum)
                    n_threads.x,
                    n_threads.y,
                    n_threads.z,
                    n_shared);
                %(fail)s;
            }

        } // END NESTED SCOPE
        """ % locals()


def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
    return GpuImages2Neibs(mode)(ten4, neib_shape, neib_step)


@local_optimizer()
def use_gpu_images2neibs(node):
    if (type(node.op) is Images2Neibs and node.inputs[0].dtype == 'float32'
            and node.op.mode in ['valid', 'ignore_borders', 'wrap_centered']):
        return [
            host_from_gpu(
                gpu_images2neibs(gpu_from_host(node.inputs[0]),
                                 node.inputs[1],
                                 node.inputs[2],
                                 mode=node.op.mode))
        ]


if cuda_available:
    register_gpu_opt()(use_gpu_images2neibs)
Exemplo n.º 4
0
        return code


@local_optimizer([CumsumOp])
def use_gpu_cumsum(node):
    if type(node.op) is CumsumOp \
       and node.inputs[0].dtype == 'float32' \
       and node.inputs[0].owner \
       and isinstance(node.inputs[0].owner.op, HostFromGpu):

        axis = node.op.axis
        x = node.inputs[0]

        if axis is not None and x.ndim > GpuCumsum.SUPPORTED_NDIMS:
            return None

        x = gpu_from_host(x)

        if axis is None and x.ndim > 1:
            x = GpuFlatten()(x)

        # ``gpu_cumsum`` assume array has been flattened if needed.
        if axis is None:
            axis = 0

        return [host_from_gpu(GpuCumsum(axis)(x))]

if cuda_available:
    register_gpu_opt()(use_gpu_cumsum)