def get_cudnn_version(): """ This funciton return the version of cudnn. the retuen value is int which represents the cudnn version. For example, if it return 7600, it represents the version of cudnn is 7.6. Returns: int: A int value which represents the cudnn version. If cudnn version is not installed, it return None. Examples: .. code-block:: python import paddle cudnn_version = paddle.device.get_cudnn_version() """ global _cudnn_version if not core.is_compiled_with_cuda(): return None if _cudnn_version is None: cudnn_version = int(core.cudnn_version()) _cudnn_version = cudnn_version if _cudnn_version < 0: return None else: return cudnn_version else: return _cudnn_version
def create_test_cudnn_bf16_class(parent): @unittest.skipIf( not core.is_compiled_with_cuda() or core.cudnn_version() < 8100, "core is not compiled with CUDA and cudnn version need larger than 8.1.0" ) class TestConv2DCUDNNBF16(parent): def get_numeric_grad(self, place, check_name): scope = core.Scope() self._check_grad_helper() op = create_op(scope, self.op_type, self.inputs, self.outputs, self.attrs) return get_numeric_gradient(place, scope, op, self.inputs_fp32, check_name, ['Output']) def init_kernel_type(self): self.use_cudnn = True self.no_need_check_grad = True self.dtype = np.uint16 def test_check_output(self): place = core.CUDAPlace(0) self.check_output_with_place(place, atol=1e-2) def test_check_grad_no_filter(self): place = core.CUDAPlace(0) numeric_grads = self.get_numeric_grad(place, 'Input') self.check_grad_with_place( place, ['Input'], 'Output', no_grad_set=set(['Filter']), user_defined_grads=[numeric_grads]) def test_check_grad_no_input(self): place = core.CUDAPlace(0) numeric_grads = self.get_numeric_grad(place, 'Filter') self.check_grad_with_place( place, ['Filter'], 'Output', no_grad_set=set(['Input']), user_defined_grads=[numeric_grads]) cls_name = "{0}_{1}".format(parent.__name__, "CUDNNBF16") TestConv2DCUDNNBF16.__name__ = cls_name globals()[cls_name] = TestConv2DCUDNNBF16
def test_main(self): if (not core.is_compiled_with_cuda()) or (core.cudnn_version() < 8100): return x_np = np.random.random([10, 20]).astype('float32') weight_np = np.random.random([20]).astype('float32') bias_np = np.random.random([20]).astype('float32') y_np_1, x_g_np_1, w_g_np_1, b_g_np_1 = self.check_main( x_np, weight_np, bias_np, 'float32') y_np_2, x_g_np_2, w_g_np_2, b_g_np_2 = self.check_main( x_np, weight_np, bias_np, 'bfloat16') def assert_equal(x, y): self.assertTrue(np.allclose(x, y, atol=1.e-1)) assert_equal(y_np_1, y_np_2) assert_equal(x_g_np_1, x_g_np_2) assert_equal(w_g_np_1, w_g_np_2) assert_equal(b_g_np_1, b_g_np_2)
def test_check_grad_ingore_x(self): self.check_grad(['Y'], 'Out', max_relative_error=0.005, no_grad_set=set("X")) def test_check_grad_ingore_y(self): self.check_grad(['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y')) @unittest.skipIf( core.is_compiled_with_cuda() and core.cudnn_version() < 8100, "run test when gpu is availble and the minimum cudnn version is 8.1.0.") class TestElementwiseBF16Op(OpTest): def setUp(self): self.op_type = "elementwise_max" self.python_api = paddle.maximum self.dtype = np.uint16 # If x and y have the same value, the max() is not differentiable. # So we generate test data by the following method # to avoid them being too close to each other. x = np.random.uniform(0.1, 1, [13, 17]).astype(np.float32) sgn = np.random.choice([-1, 1], [13, 17]).astype(np.float32) y = x + sgn * np.random.uniform(0.1, 1, [13, 17]).astype(np.float32) self.inputs = { 'X': convert_float_to_uint16(x), 'Y': convert_float_to_uint16(y)
def test_check_output(self): place = core.CUDAPlace(0) self.check_output_with_place(place, check_dygraph=(self.use_mkldnn == False)) def test_check_grad(self): place = core.CUDAPlace(0) self.check_grad_with_place(place, ["X"], "Out", numeric_grad_delta=0.05, check_dygraph=(self.use_mkldnn == False)) @unittest.skipIf( not core.is_compiled_with_cuda() or core.cudnn_version() < 8100, "core is not compiled with CUDA and cudnn version need larger than 8.1.0") class TestSoftmaxBF16CUDNNOp(TestSoftmaxBF16Op): def init_cudnn(self): return True class TestSoftmaxAPI(unittest.TestCase): def setUp(self): self.place = paddle.CUDAPlace( 0) if core.is_compiled_with_cuda() else paddle.CPUPlace() self.x_np = np.random.uniform(-1., 1., [2, 3, 4, 5]).astype('float32') self.out_ref = np.apply_along_axis(stable_softmax, -1, self.x_np) self.executed_api() def executed_api(self):