def _check_gpu_tensors(tensors): """Check all tensors are distributed on different GPUs.""" if not tensors or not isinstance(tensors, list): raise RuntimeError("'tensors' must be a nonempty list.") if len(tensors) > nccl_util.get_num_gpus(): raise RuntimeError("Tensor list cannot be larger than the number" "of available GPUs. Got {} > {}.".format( len(tensors), nccl_util.get_num_gpus())) t0 = tensors[0] dt = nccl_util.get_nccl_tensor_dtype(t0) s = nccl_util.get_tensor_shape(t0) d = nccl_util.get_tensor_device(t0) for i, t in enumerate(tensors): if i == 0: continue # We need to check the following: # (1) tensor is cuda (already checked during API) # (2) tensor dtype # (3) tensor shape match # (4) each tensor is on a different GPU dtype = nccl_util.get_nccl_tensor_dtype(t) if dt != dtype: raise RuntimeError( "Tensors must have identical dtype. Got: '{}'.".format(dtype)) shape = nccl_util.get_tensor_shape(t) if s != shape: raise RuntimeError( "Tensor must have identical shape. Got: '{}'.".format(shape)) device = nccl_util.get_tensor_device(t) if device == d: raise RuntimeError("Tensor must be on distinct GPUs.")
def _check_inputs_compatibility_for_scatter_gather(tensor, tensor_list): """Check the compatibility between tensor input and tensor list inputs.""" if not tensor_list: raise RuntimeError("Got empty list of tensors.") dtype = nccl_util.get_nccl_tensor_dtype(tensor) shape = nccl_util.get_tensor_shape(tensor) for t in tensor_list: # check dtype dt = nccl_util.get_nccl_tensor_dtype(t) if dt != dtype: raise RuntimeError("All tensor operands to scatter/gather must " "have the same dtype. Got '{}' and '{}'" "".format(dt, dtype)) # Note: typically CCL libraries only requires they have the same # number of elements; # Here we make it more strict -- we require exact shape match. if nccl_util.get_tensor_shape(t) != shape: raise RuntimeError("All tensor operands to scatter/gather must " "have the same shape.")
def _check_inputs_compatibility_for_scatter_gather(tensors, tensor_lists): """Check the compatibility between tensor input and tensor list input.""" if not tensors or not isinstance(tensors, list): raise RuntimeError("The first argument 'tensors' expects a list of tensors.") if not tensor_lists or not isinstance(tensor_lists, list): raise RuntimeError( "The second argument 'tensor_lists' " "expects a list of tensor list." ) dtype = nccl_util.get_nccl_tensor_dtype(tensors[0]) shape = nccl_util.get_tensor_shape(tensors[0]) for i, tensor_list in enumerate(tensor_lists): # check all tensor in `tensors` match. dt = nccl_util.get_nccl_tensor_dtype(tensors[i]) if dt != dtype: raise RuntimeError( "All tensor operands to scatter/gather must " "have the same dtype. Got '{}' and '{}'.".format(dt, dtype) ) # Note: typically CCL libraries only requires they have the same # number of elements; Here we make it more strict -- we require # exact shape match. s = nccl_util.get_tensor_shape(tensors[i]) if s != shape: raise RuntimeError( "All tensor operands to scatter/gather must " "have the same shape. Got '{}' and '{}'.".format(s, shape) ) # check all tensors in `tensor_lists` match. for t in tensor_lists[i]: # check dtype dt = nccl_util.get_nccl_tensor_dtype(t) if dt != dtype: raise RuntimeError( "All tensor operands to scatter/gather must " "have the same dtype. Got '{}' and '{}'.".format(dt, dtype) ) s = nccl_util.get_tensor_shape(t) if s != shape: raise RuntimeError( "All tensor operands to scatter/gather must " "have the same shape. Got '{}' and '{}'.".format(s, shape) )
def _flatten_for_scatter_gather(tensor_list, copy=False): """Flatten the tensor for gather/scatter operations. Args: tensor_list: the list of tensors to be scattered/gathered. copy: whether the copy the tensors in tensor_list into the buffer. Returns: The flattened tensor buffer. """ if not tensor_list: raise RuntimeError("Received an empty list.") t = tensor_list[0] # note we need a cupy dtype here. dtype = nccl_util.get_cupy_tensor_dtype(t) buffer_shape = [len(tensor_list)] + nccl_util.get_tensor_shape(t) buffer = cupy.empty(buffer_shape, dtype=dtype) if copy: for i, tensor in enumerate(tensor_list): nccl_util.copy_tensor(buffer[i], tensor) return buffer