Пример #1
0
Файл: cg.py Проект: takagi/cupy
 def call(self, env, group):
     if _runtime.runtimeGetVersion() < 11000:
         raise RuntimeError("not supported in CUDA < 11.0")
     if not isinstance(group.ctype, _ThreadGroup):
         raise ValueError("group must be a valid cooperative group")
     _check_include(env, 'cg')
     return _Data(f'cg::sync({group.code})', _cuda_types.void)
Пример #2
0
Файл: cg.py Проект: takagi/cupy
 def call(self, env, group, step):
     if _runtime.runtimeGetVersion() < 11000:
         raise RuntimeError("not supported in CUDA < 11.0")
     _check_include(env, 'cg')
     if not isinstance(step, _Constant):
         raise ValueError('step must be a compile-time constant')
     return _Data(f'cg::wait_prior<{step.obj}>({group.code})',
                  _cuda_types.void)
Пример #3
0
Файл: cg.py Проект: takagi/cupy
    def thread_index(self, env):
        """
        thread_index()

        3-Dimensional index of the thread within the launched block.
        """
        _check_include(env, 'cg')
        return _Data('thread_index()', _cuda_types.dim3)
Пример #4
0
Файл: cg.py Проект: takagi/cupy
    def group_index(self, env):
        """
        group_index()

        3-Dimensional index of the block within the launched grid.
        """
        _check_include(env, 'cg')
        return _Data('group_index()', _cuda_types.dim3)
Пример #5
0
Файл: cg.py Проект: takagi/cupy
    def is_valid(self, env):
        """
        is_valid()

        Returns whether the grid_group can synchronize.
        """
        _check_include(env, 'cg')
        return _Data('is_valid()', _cuda_types.bool_)
Пример #6
0
Файл: cg.py Проект: takagi/cupy
    def thread_rank(self, env):
        """
        thread_rank()

        Rank of the calling thread within ``[0, num_threads)``.
        """
        _check_include(env, 'cg')
        return _Data('thread_rank()', _cuda_types.uint64)
Пример #7
0
Файл: cg.py Проект: takagi/cupy
    def group_dim(self, env):
        """
        group_dim()

        Dimensions of the launched block in units of threads.
        """
        # despite it is an alias of dim_threads, we need it for earlier 11.x
        _check_include(env, 'cg')
        return _Data('group_dim()', _cuda_types.dim3)
Пример #8
0
Файл: cg.py Проект: takagi/cupy
    def size(self, env):
        """
        size()

        Total number of threads in the group.
        """
        # despite it is an alias of num_threads, we need it for earlier 11.x
        _check_include(env, 'cg')
        return _Data('size()', _cuda_types.uint32)
Пример #9
0
    def group_index(self, env):
        """
        group_index()

        3-Dimensional index of the block within the launched grid.
        """
        from cupyx.jit._interface import _Dim3  # avoid circular import
        self._check_cg_include(env)
        return _Data('group_index()', _Dim3())
Пример #10
0
    def thread_index(self, env):
        """
        thread_index()

        3-Dimensional index of the thread within the launched block.
        """
        from cupyx.jit._interface import _Dim3  # avoid circular import
        self._check_cg_include(env)
        return _Data('thread_index()', _Dim3())
Пример #11
0
Файл: cg.py Проект: takagi/cupy
    def dim_threads(self, env):
        """
        dim_threads()

        Dimensions of the launched block in units of threads.
        """
        if _runtime.runtimeGetVersion() < 11060:
            raise RuntimeError("dim_threads() is supported on CUDA 11.6+")
        _check_include(env, 'cg')
        return _Data('dim_threads()', _cuda_types.dim3)
Пример #12
0
Файл: cg.py Проект: takagi/cupy
    def block_index(self, env):
        """
        block_index()

        3-Dimensional index of the block within the launched grid.
        """
        if _runtime.runtimeGetVersion() < 11060:
            raise RuntimeError("block_index() is supported on CUDA 11.6+")
        _check_include(env, 'cg')
        return _Data('block_index()', _cuda_types.dim3)
Пример #13
0
    def num_blocks(self, env):
        """
        num_blocks()

        Total number of blocks in the group.
        """
        if _runtime.runtimeGetVersion() < 11060:
            raise RuntimeError("num_blocks() is supported on CUDA 11.6+")
        self._check_cg_include(env)
        return _Data('num_blocks()', _cuda_types.uint64)
Пример #14
0
    def group_dim(self, env):
        """
        group_dim()

        Dimensions of the launched block in units of threads.
        """
        # despite it is an alias of dim_threads, we need it for earlier 11.x
        from cupyx.jit._interface import _Dim3  # avoid circular import
        self._check_cg_include(env)
        return _Data('group_dim()', _Dim3())
Пример #15
0
Файл: cg.py Проект: takagi/cupy
    def num_threads(self, env):
        """
        num_threads()

        Total number of threads in the group.
        """
        if _runtime.runtimeGetVersion() < 11060:
            raise RuntimeError("num_threads() is supported on CUDA 11.6+")
        _check_include(env, 'cg')
        return _Data('num_threads()', _cuda_types.uint32)
Пример #16
0
Файл: cg.py Проект: takagi/cupy
    def block_rank(self, env):
        """
        block_rank()

        Rank of the calling block within ``[0, num_blocks)``.
        """
        if _runtime.runtimeGetVersion() < 11060:
            raise RuntimeError("block_rank() is supported on CUDA 11.6+")
        _check_include(env, 'cg')
        return _Data('block_rank()', _cuda_types.uint64)
Пример #17
0
    def dim_threads(self, env):
        """
        dim_threads()

        Dimensions of the launched block in units of threads.
        """
        if _runtime.runtimeGetVersion() < 11060:
            raise RuntimeError("dim_threads() is supported on CUDA 11.6+")
        from cupyx.jit._interface import _Dim3  # avoid circular import
        self._check_cg_include(env)
        return _Data('dim_threads()', _Dim3())
Пример #18
0
    def block_index(self, env):
        """
        block_index()

        3-Dimensional index of the block within the launched grid.
        """
        if _runtime.runtimeGetVersion() < 11060:
            raise RuntimeError("block_index() is supported on CUDA 11.6+")
        from cupyx.jit._interface import _Dim3  # avoid circular import
        self._check_cg_include(env)
        return _Data('block_index()', _Dim3())
Пример #19
0
Файл: cg.py Проект: takagi/cupy
 def call_const(self, env):
     if _runtime.is_hip:
         raise RuntimeError('cooperative group is not supported on HIP')
     if self.group_type == 'grid':
         if _runtime.runtimeGetVersion() < 11000:
             raise RuntimeError(
                 "For pre-CUDA 11, the grid group has very limited "
                 "functionality (only group.sync() works), and so we "
                 "disable the grid group support to prepare the transition "
                 "to support CUDA 11+ only.")
         cg_type = _GridGroup()
     elif self.group_type == 'thread_block':
         cg_type = _ThreadBlockGroup()
     return _Data(f'cg::this_{self.group_type}()', cg_type)
Пример #20
0
Файл: cg.py Проект: takagi/cupy
    def call(self,
             env,
             group,
             dst,
             dst_idx,
             src,
             src_idx,
             size,
             *,
             aligned_size=None):
        if _runtime.runtimeGetVersion() < 11010:
            # the overloaded version of memcpy_async that we use does not yet
            # exist in CUDA 11.0
            raise RuntimeError("not supported in CUDA < 11.1")
        _check_include(env, 'cg')
        _check_include(env, 'cg_memcpy_async')

        dst = _Data.init(dst, env)
        src = _Data.init(src, env)
        for arr in (dst, src):
            if not isinstance(arr.ctype,
                              (_cuda_types.CArray, _cuda_types.Ptr)):
                raise TypeError('dst/src must be of array type.')
        dst = _compile._indexing(dst, dst_idx, env)
        src = _compile._indexing(src, src_idx, env)

        size = _compile._astype_scalar(
            # it's very unlikely that the size would exceed 2^32, so we just
            # pick uint32 for simplicity
            size,
            _cuda_types.uint32,
            'same_kind',
            env)
        size = _Data.init(size, env)
        size_code = f'{size.code}'

        if aligned_size:
            if not isinstance(aligned_size, _Constant):
                raise ValueError(
                    'aligned_size must be a compile-time constant')
            _check_include(env, 'cuda_barrier')
            size_code = (f'cuda::aligned_size_t<{aligned_size.obj}>'
                         f'({size_code})')
        return _Data(
            f'cg::memcpy_async({group.code}, &({dst.code}), '
            f'&({src.code}), {size_code})', _cuda_types.void)
Пример #21
0
 def sync(self, env):
     self._check_cg_include(env)
     return _Data('sync()', _cuda_types.void)
Пример #22
0
Файл: cg.py Проект: takagi/cupy
 def sync(self, env):
     _check_include(env, 'cg')
     return _Data('sync()', _cuda_types.void)
Пример #23
0
Файл: cg.py Проект: takagi/cupy
 def call(self, env, group):
     if _runtime.runtimeGetVersion() < 11000:
         raise RuntimeError("not supported in CUDA < 11.0")
     _check_include(env, 'cg')
     return _Data(f'cg::wait({group.code})', _cuda_types.void)