Exemplo n.º 1
0
def reduction(alpha,
              A,
              desc_A,
              mode_A,
              beta,
              C,
              desc_C,
              mode_C,
              reduce_op=cutensor.OP_ADD,
              compute_dtype=None):
    """Tensor reduction

    This routine computes the tensor reduction:

        C = alpha * reduce_op(uop_A(A)) + beta * uop_C(C))

    See :func:`cupy.cuda.cutensor.reduction` for details.

    Args:
        alpha (scalar or 0-dim numpy.ndarray): Scaling factor for A.
        A (cupy.ndarray): Input tensor.
        desc_A (class Descriptor): A descriptor that holds the information
            about the data type, modes, strides and unary operator (uop_A) of
            tensor A.
        mode_A (cutensor.Mode): A mode object created by `create_mode`.
        beta (scalar or 0-dim numpy.ndarray): Scaling factor for C.
        C (cupy.ndarray): Input/output tensor.
        desc_C (class Descriptor): A descriptor that holds the information
            about the data type, modes, strides and unary operator (uop_C) of
            tensor C.
        mode_C (cutensor.Mode): A mode object created by `create_mode`.
        reduce_op (cutensorOperator_t): Binary operator used to reduce A.
        compute_dtype (numpy.dtype): Compute type for the intermediate
            computation.

    Returns:
        out (cupy.ndarray): Output tensor.

    Examples:
        See examples/cutensor/reduction.py
    """
    if A.dtype != C.dtype:
        raise ValueError('dtype mismatch: {} != {}'.format(A.dtype, C.dtype))
    if not (A.flags.c_contiguous and C.flags.c_contiguous):
        raise ValueError('The inputs should be contiguous arrays.')

    mode_A = _auto_create_mode(A, mode_A)
    mode_C = _auto_create_mode(C, mode_C)

    out = C
    compute_dtype = _set_compute_dtype(A.dtype, compute_dtype)
    alpha = numpy.asarray(alpha, compute_dtype)
    beta = numpy.asarray(beta, compute_dtype)
    handle = get_handle()
    cutensor_dtype = get_cutensor_dtype(compute_dtype)
    ws_size = cutensor.reductionGetWorkspace(handle, A.data.ptr, desc_A,
                                             mode_A.data, C.data.ptr, desc_C,
                                             mode_C.data, out.data.ptr, desc_C,
                                             mode_C.data, reduce_op,
                                             cutensor_dtype)
    try:
        ws = cupy.ndarray((ws_size, ), dtype=numpy.int8)
    except cupy.cuda.memory.OutOfMemoryError:
        warnings.warn('cuTENSOR: failed to allocate memory of workspace '
                      '(size: {}).'.format(ws_size))
        ws_size = 0
        ws = cupy.ndarray((ws_size, ), dtype=numpy.int8)
    cutensor.reduction(handle, alpha.ctypes.data, A.data.ptr, desc_A,
                       mode_A.data, beta.ctypes.data, C.data.ptr, desc_C,
                       mode_C.data, out.data.ptr, desc_C, mode_C.data,
                       reduce_op, cutensor_dtype, ws.data.ptr, ws_size)
    return out
Exemplo n.º 2
0
def reduction(alpha,
              A,
              desc_A,
              mode_A,
              beta,
              C,
              desc_C,
              mode_C,
              reduce_op=cutensor.OP_ADD,
              compute_dtype=None):
    """Tensor reduction

    This routine computes the tensor reduction:

        C = alpha * reduce_op(uop_A(A)) + beta * uop_C(C))

    See :func:`cupy.cuda.cutensor.reduction` for details.

    Args:
        alpha: Scaling factor for A.
        A (cupy.ndarray): Input tensor.
        desc_A (class Descriptor): A descriptor that holds the information
            about the data type, modes, strides and unary operator (uop_A) of
            tensor A.
        mode_A (tuple of int/str): A tuple that holds the labels of the modes
            of tensor A (e.g., if A_{x,y,z}, mode_A = {'x','y','z'})
        beta: Scaling factor for C.
        C (cupy.ndarray): Input/output tensor.
        desc_C (class Descriptor): A descriptor that holds the information
            about the data type, modes, strides and unary operator (uop_C) of
            tensor C.
        mode_C (tuple of int/str): A tuple that holds the labels of the modes
            of tensor C.
        reduce_op (cutensorOperator_t): Binary operator used to reduce A.
        compute_dtype (numpy.dtype): Compute type for the intermediate
            computation.

    Returns:
        out (cupy.ndarray): Output tensor.

    Examples:
        See examples/cutensor/reduction.py
    """
    assert A.dtype == C.dtype
    assert A.ndim == len(mode_A)
    assert C.ndim == len(mode_C)
    mode_A = _convert_mode(mode_A)
    mode_C = _convert_mode(mode_C)
    out = C
    compute_dtype = _set_compute_dtype(A.dtype, compute_dtype)
    alpha = numpy.array(alpha, compute_dtype)
    beta = numpy.array(beta, compute_dtype)
    handle = get_handle()
    cutensor_dtype = get_cutensor_dtype(compute_dtype)
    ws_size = cutensor.reductionGetWorkspace(handle, A.data.ptr, desc_A,
                                             mode_A.ctypes.data, C.data.ptr,
                                             desc_C, mode_C.ctypes.data,
                                             out.data.ptr, desc_C,
                                             mode_C.ctypes.data, reduce_op,
                                             cutensor_dtype)
    try:
        ws = cupy.ndarray((ws_size, ), dtype=numpy.int8)
    except cupy.cuda.memory.OutOfMemoryError:
        warnings.warn('cuTENSOR: failed to allocate memory of workspace '
                      '(size: {}).'.format(ws_size))
        ws_size = 0
        ws = cupy.ndarray((ws_size, ), dtype=numpy.int8)
    cutensor.reduction(handle, alpha.ctypes.data, A.data.ptr, desc_A,
                       mode_A.ctypes.data, beta.ctypes.data, C.data.ptr,
                       desc_C, mode_C.ctypes.data, out.data.ptr, desc_C,
                       mode_C.ctypes.data, reduce_op, cutensor_dtype,
                       ws.data.ptr, ws_size)
    return out