def full( shape: Union[int, Tuple[int, ...]], fill_value: Union[int, float], *, dtype: Optional[Dtype] = None, device: Optional[Device] = None, ) -> Array: """ Array API compatible wrapper for :py:func:`np.full <numpy.full>`. See its docstring for more information. """ from ._array_object import Array _check_valid_dtype(dtype) if device is not None and not isinstance(device, _Device): raise ValueError(f"Unsupported device {device!r}") if device is None: device = _Device() # current device if isinstance(fill_value, Array) and fill_value.ndim == 0: fill_value = fill_value._array prev_device = runtime.getDevice() try: runtime.setDevice(device.id) res = np.full(shape, fill_value, dtype=dtype) finally: runtime.setDevice(prev_device) if res.dtype not in _all_dtypes: # This will happen if the fill value is not something that NumPy # coerces to one of the acceptable dtypes. raise TypeError("Invalid input to full") return Array._new(res)
def zeros( shape: Union[int, Tuple[int, ...]], *, dtype: Optional[Dtype] = None, device: Optional[Device] = None, ) -> Array: """ Array API compatible wrapper for :py:func:`np.zeros <numpy.zeros>`. See its docstring for more information. """ from ._array_object import Array _check_valid_dtype(dtype) if device is not None and not isinstance(device, _Device): raise ValueError(f"Unsupported device {device!r}") if device is None: device = _Device() # current device prev_device = runtime.getDevice() try: runtime.setDevice(device.id) return Array._new(np.zeros(shape, dtype=dtype)) finally: runtime.setDevice(prev_device)
def copyto(dst, src, casting='same_kind', where=None): """Copies values from one array to another with broadcasting. This function can be called for arrays on different devices. In this case, casting, ``where``, and broadcasting is not supported, and an exception is raised if these are used. Args: dst (cupy.ndarray): Target array. src (cupy.ndarray): Source array. casting (str): Casting rule. See :func:`numpy.can_cast` for detail. where (cupy.ndarray of bool): If specified, this array acts as a mask, and an element is copied only if the corresponding element of ``where`` is True. .. seealso:: :func:`numpy.copyto` """ src_is_numpy_scalar = False src_type = type(src) src_is_python_scalar = src_type in ( int, bool, float, complex, fusion._FusionVarScalar, _fusion_interface._ScalarProxy) if src_is_python_scalar: src_dtype = numpy.dtype(type(src)) can_cast = numpy.can_cast(src, dst.dtype, casting) elif isinstance(src, numpy.ndarray) or numpy.isscalar(src): if src.size != 1: raise ValueError( 'non-scalar numpy.ndarray cannot be used for copyto') src_dtype = src.dtype can_cast = numpy.can_cast(src, dst.dtype, casting) src = src.item() src_is_numpy_scalar = True else: src_dtype = src.dtype can_cast = numpy.can_cast(src_dtype, dst.dtype, casting) if not can_cast: raise TypeError('Cannot cast %s to %s in %s casting mode' % (src_dtype, dst.dtype, casting)) if fusion._is_fusing(): # TODO(kataoka): NumPy allows stripping leading unit dimensions. # But fusion array proxy does not currently support # `shape` and `squeeze`. if where is None: _core.elementwise_copy(src, dst) else: fusion._call_ufunc(search._where_ufunc, where, src, dst, dst) return if not src_is_python_scalar and not src_is_numpy_scalar: # Check broadcast condition # - for fast-paths and # - for a better error message (than ufunc's). # NumPy allows stripping leading unit dimensions. if not all([ s in (d, 1) for s, d in itertools.zip_longest( reversed(src.shape), reversed(dst.shape), fillvalue=1) ]): raise ValueError( "could not broadcast input array " f"from shape {src.shape} into shape {dst.shape}") squeeze_ndim = src.ndim - dst.ndim if squeeze_ndim > 0: # always succeeds because broadcast conition is checked. src = src.squeeze(tuple(range(squeeze_ndim))) if where is not None: _core.elementwise_copy(src, dst, _where=where) return if dst.size == 0: return if src_is_python_scalar or src_is_numpy_scalar: _core.elementwise_copy(src, dst) return if _can_memcpy(dst, src): dst.data.copy_from_async(src.data, src.nbytes) return device = dst.device prev_device = runtime.getDevice() try: runtime.setDevice(device.id) if src.device != device: src = src.copy() _core.elementwise_copy(src, dst) finally: runtime.setDevice(prev_device)
def _repeat(func, args, kwargs, n_repeat, name, n_warmup, max_duration, devices): events_1 = [] events_2 = [] for i in devices: prev_device = runtime.getDevice() try: runtime.setDevice(i) events_1.append(_cupy.cuda.stream.Event()) events_2.append(_cupy.cuda.stream.Event()) finally: runtime.setDevice(prev_device) ev1 = _cupy.cuda.stream.Event() ev2 = _cupy.cuda.stream.Event() for i in range(n_warmup): func(*args, **kwargs) for event, device in zip(events_1, devices): prev_device = runtime.getDevice() try: runtime.setDevice(device) event.record() finally: runtime.setDevice(prev_device) event.synchronize() cpu_times = [] gpu_times = [[] for i in events_1] duration = 0 for i in range(n_repeat): for event, device in zip(events_1, devices): prev_device = runtime.getDevice() try: runtime.setDevice(device) event.record() finally: runtime.setDevice(prev_device) t1 = _time.perf_counter() func(*args, **kwargs) t2 = _time.perf_counter() cpu_time = t2 - t1 cpu_times.append(cpu_time) for event, device in zip(events_2, devices): prev_device = runtime.getDevice() try: runtime.setDevice(device) event.record() finally: runtime.setDevice(prev_device) for event, device in zip(events_2, devices): prev_device = runtime.getDevice() try: runtime.setDevice(device) event.synchronize() finally: runtime.setDevice(prev_device) for i, (ev1, ev2) in enumerate(zip(events_1, events_2)): gpu_time = _cupy.cuda.get_elapsed_time(ev1, ev2) * 1e-3 gpu_times[i].append(gpu_time) duration += _time.perf_counter() - t1 if duration > max_duration: break ts = _numpy.asarray([cpu_times] + gpu_times, dtype=_numpy.float64) return _PerfCaseResult(name, ts, devices=devices)
# circular imports from ._array_object import Array _check_valid_dtype(dtype) if device is not None and not isinstance(device, _Device): raise ValueError(f"Unsupported device {device!r}") if device is None: device = _Device() # current device if copy is False: # Note: copy=False is not yet implemented in np.asarray raise NotImplementedError("copy=False is not yet implemented") if isinstance(obj, Array): if dtype is not None and obj.dtype != dtype: copy = True if copy is True: prev_device = runtime.getDevice() try: runtime.setDevice(device.id) obj = Array._new(np.array(obj._array, copy=True, dtype=dtype)) finally: runtime.setDevice(prev_device) return obj if dtype is None and isinstance(obj, int) and (obj > 2**64 or obj < -(2**63)): # Give a better error message in this case. NumPy would convert this # to an object array. TODO: This won't handle large integers in lists. raise OverflowError("Integer out of bounds for array dtypes") prev_device = runtime.getDevice() try: runtime.setDevice(device.id) res = np.asarray(obj, dtype=dtype)