def _get_crossentropyloss_gpu(probs, t):

    kernel = _crossentropyloss_kernel()
    N, M = probs.shape
    loss = cp.empty((1,), dtype=np.float32)
    kernel(grid=(N, 1, 1), block=(32, 1, 1), args=(probs, t, loss, np.int32(N), np.int32(M)))
    return loss
def _get_crossentropyloss_gpu(probs, t):

    kernel = _crossentropyloss_kernel()
    N, M = probs.shape
    loss = cp.empty((1, ), dtype=np.float32)
    kernel(grid=(N, 1, 1),
           block=(32, 1, 1),
           args=(probs, t, loss, np.int32(N), np.int32(M)))
    return loss
def _gsoftmaxCrossentropy_gpu(y, t, coef):

    kernel = _gsoftmaxCrossentropy_kernel()
    N, M = y.shape
    if N == 1:
        bdim, gdim = gpu.utils.Get_bdim_and_gdimRowVec(M)
    elif M >= (N * 4):
        bdim, gdim = gpu.utils.Get_bdim_and_gdimSmallNBigM(N, M)
    else:
        bdim, gdim = gpu.utils.Get_bdim_and_gdim2D(N, M)

    kernel(grid=gdim, block=bdim, args=(y, t, coef, np.int32(N), np.int32(M)))
    return y
def _gsoftmaxCrossentropy_gpu(y, t, coef):

    kernel = _gsoftmaxCrossentropy_kernel()
    N, M = y.shape
    if N == 1:
        bdim, gdim = gpu.utils.Get_bdim_and_gdimRowVec(M)
    elif M >= (N * 4):
        bdim, gdim = gpu.utils.Get_bdim_and_gdimSmallNBigM(N, M)
    else:
        bdim, gdim = gpu.utils.Get_bdim_and_gdim2D(N, M)

    kernel(grid=gdim, block=bdim, args=(y, t, coef, np.int32(N), np.int32(M)))
    return y