def forward(self, inputs): x = self.linear1(inputs) x = F.relu(x) if paddle.rand([ 1, ]) > 0.5: x = self.linear2(x) x = F.relu(x) x = self.linear3(x) return x
def test_forward_shape_full(): @paddle.jit.to_static def full1(inputs): return paddle.full(paddle.shape(inputs), 3.14) @paddle.jit.to_static def full2(inputs): return paddle.full(paddle.shape(inputs), 1.0, dtype=inputs.dtype) input_shape = [1, 3, 10, 10] input_data = paddle.rand(input_shape, dtype="float32") verify_model(full1, input_data=[input_data]) verify_model(full2, input_data=[input_data])
def sample_from_softmax(self, logits, use_softmax_sample=True): if use_softmax_sample: #uniform_noise = paddle.uniform(logits.shape, dtype="float32", min=0, max=1) uniform_noise = paddle.rand(logits.shape, dtype="float32") gumbel_noise = -paddle.log(-paddle.log(uniform_noise + 1e-9) + 1e-9) else: gumbel_noise = paddle.zeros_like(logits) # softmax_sample equal to sampled_tokids.unsqueeze(-1) softmax_sample = paddle.argmax(F.softmax(logits + gumbel_noise), axis=-1) # one hot return F.one_hot(softmax_sample, logits.shape[-1])
def test_forward_ones_like(): @paddle.jit.to_static def ones_like1(inputs): return paddle.ones_like(inputs) @paddle.jit.to_static def ones_like2(inputs): return paddle.ones_like(inputs, dtype="int32") input_shape = [1, 3, 10, 10] input_data = paddle.rand(input_shape, dtype="float32") verify_model(ones_like1, input_data=input_data) verify_model(ones_like2, input_data=input_data)
def test_concrete_program(self): with fluid.dygraph.guard(fluid.CPUPlace()): # usage 1 foo_1 = paddle.jit.to_static(foo_func, input_spec=[ InputSpec([10], name='x'), InputSpec([10], name='y') ]) self.assertTrue(isinstance(foo_1.concrete_program, ConcreteProgram)) # usage 2 foo_2 = paddle.jit.to_static(foo_func) out = foo_2(paddle.rand([10]), paddle.rand([10])) self.assertTrue(isinstance(foo_2.concrete_program, ConcreteProgram)) # raise error foo_3 = paddle.jit.to_static(foo_func) with self.assertRaises(ValueError): foo_3.concrete_program
def build_input(input_size, dtypes): if isinstance(input_size, list) and all( isinstance(i, numbers.Number) for i in input_size): if isinstance(dtypes, list): dtype = dtypes[0] else: dtype = dtypes return paddle.cast(paddle.rand(list(input_size)), dtype) if isinstance(input_size, dict): inputs = {} if isinstance(dtypes, list): dtype = dtypes[0] else: dtype = dtypes for key, value in input_size.items(): inputs[key] = paddle.cast(paddle.rand(list(value)), dtype) return inputs if isinstance(input_size, list): return [ build_input(i, dtype) for i, dtype in zip(input_size, dtypes) ]
def drop_path(x, drop_prob=0., training=False): if drop_prob == 0. or not training: return x keep_prob = 1 - drop_prob B = paddle.shape(x)[0] ndim = len(paddle.shape(x)) shape = (B,) + (1,) * (ndim - 1) # work with diff dim tensors, not just 2D ConvNets random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype) random_tensor = random_tensor.floor() # binarize output = x / keep_prob * random_tensor return output
def drop_path(x, drop_prob=0., training=False): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper... See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... """ if drop_prob == 0. or not training: return x keep_prob = paddle.to_tensor(1 - drop_prob) shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1) random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype) random_tensor = paddle.floor(random_tensor) # binarize output = x.divide(keep_prob) * random_tensor return output
def test_starts_ends_is_tensor(self): with paddle.fluid.dygraph.guard(): a = paddle.rand(shape=[4, 5, 6], dtype='float32') axes = [0, 1, 2] starts = [-3, 0, 2] ends = [3, 2, 4] a_1 = paddle.slice(a, axes=axes, starts=paddle.to_tensor(starts, dtype='int32'), ends=paddle.to_tensor(ends, dtype='int32')) a_2 = paddle.slice(a, axes=axes, starts=starts, ends=ends) self.assertTrue(np.array_equal(a_1.numpy(), a_2.numpy()))
def _run(self, to_static): self._init_seed() if to_static: self.net = paddle.jit.to_static(self.net) x = paddle.rand([16, 10], 'float32') out = self.net(x) if to_static: load_out = self._test_load(self.net, x) self.assertTrue(np.allclose(load_out, out), msg='load_out is {}\st_out is {}'.format( load_out, out)) return out
def test_jit_save_load_static_function(self): @paddle.jit.to_static def fun(inputs): return paddle.tanh(inputs) path = 'test_jit_save_load_function_1/func' inps = paddle.rand([3, 6]) origin = fun(inps) paddle.jit.save(fun, path) load_func = paddle.jit.load(path) load_result = load_func(inps) self.assertTrue((load_result - origin).abs().max() < 1e-10)
def test_apply_init_weight(self): with fluid.dygraph.guard(): net = LeNetDygraph() net.eval() net_layers = nn.Sequential(*list(net.children())) net_layers.eval() x = paddle.rand([2, 1, 28, 28]) y1 = net(x) y2 = net_layers(x) np.testing.assert_allclose(y1.numpy(), y2.numpy())
def test_forward_transpose(): class Transpose(nn.Layer): def __init__(self, perm): super(Transpose, self).__init__() self.perm = perm @paddle.jit.to_static def forward(self, inputs): inputs = inputs + inputs.size() return paddle.transpose(inputs, perm=self.perm) input_data = paddle.rand([1, 3, 5, 4, 3], dtype="float32") verify_model(Transpose([0, 1, 2, 3, 4]), input_data=input_data) verify_model(Transpose([4, 3, 2, 0, 1]), input_data=input_data)
def test_dygraph(self): for place in self.places: with fluid.dygraph.guard(place): in1 = paddle.rand(shape=(3, 3, 40, 40), dtype="float32") in2 = paddle.transpose(in1, [0, 2, 3, 1]) m1 = paddle.nn.LocalResponseNorm(size=5, data_format='NCHW') m2 = paddle.nn.LocalResponseNorm(size=5, data_format='NHWC') res1 = m1(in1) res2 = m2(in2) res2_tran = np.transpose(res2.numpy(), (0, 3, 1, 2)) self.assertTrue(np.allclose(res1.numpy(), res2_tran))
def test_dice_loss(self): input_ = paddle.rand([2, 3, num_classes]) label_ = paddle.randint(0, num_classes, [2, 3, 1], dtype=paddle.int64) input_np, label_np = input_.numpy(), label_.numpy() eye_np = np.eye(num_classes) label_np = np.float32(eye_np[np.squeeze(label_np)]) input_np = np.reshape(input_np, [2, -1]) label_np = np.reshape(label_np, [2, -1]) intersection_np = np.sum(input_np * label_np, axis=-1) union_np = input_np.sum(-1) + label_np.sum(-1) dice_np = np.mean(1 - 2 * intersection_np / (union_np + eps)) dice_paddle = nn.dice_loss(input_, label_, eps) self.assertTrue(np.isclose(dice_np, dice_paddle.numpy()).all())
def func_test_async_read_only_1dim(self): src = paddle.rand([40], dtype="float32").pin_memory() dst = paddle.empty([40], dtype="float32") buffer_ = paddle.empty([20]).pin_memory() with cuda.stream_guard(self.stream): if _in_legacy_dygraph(): core.async_read(src, dst, self.index, buffer_, self.empty, self.empty) else: core.eager.async_read(src, dst, self.index, buffer_, self.empty, self.empty) array1 = paddle.gather(src, self.index) array2 = dst[:len(self.index)] self.assertTrue(np.allclose(array1.numpy(), array2.numpy()))
def build_program(self): main_program = paddle.static.Program() startup_program = paddle.static.Program() with paddle.static.program_guard(main_program, startup_program): w = paddle.rand([10, 3]) ids = paddle.static.data(name="id", shape=[5], dtype='int64') data = paddle.static.data(name="data", shape=[3], dtype='float32') emb = paddle.nn.functional.embedding(x=ids, weight=w, sparse=False, name="embedding") emb = emb + data return main_program, startup_program, emb
def test_multiple_gpus(self): self.trainer_id = dist.get_rank() with _test_eager_guard(): self.pg = dist.init_parallel_env() model_a = SimpleNet(self.trainer_id) model_b = SimpleNet(self.trainer_id) state_dict = model_a.state_dict() model_b.set_state_dict(state_dict) model_a = paddle.DataParallel(model_a, find_unused_parameters=True, group=self.pg) model_b = paddle.DataParallel(model_b, find_unused_parameters=True, group=self.pg) ones_input = paddle.ones(shape=(batch, in_dim)) ones_input.stop_gradient = True w1_grad_sum = np.zeros((in_dim, out_dim), dtype='float32') w2_grad_sum = np.zeros((in_dim, out_dim), dtype='float32') for step_id in range(5): random_input = paddle.rand(shape=(batch, in_dim)) random_input.stop_gradient = True if step_id % 2 == 0: out_a = model_a(random_input) out_b = model_b(random_input) else: out_a = model_a(ones_input) out_b = model_b(ones_input) out_a.sum().backward() out_b.sum().backward() self.check_gradient(model_a.parameters()) self.check_gradient(model_b.parameters()) # test acc gradient w1_grad_sum = self.check_acc(model_a._layers.w1.grad, w1_grad_sum, model_b._layers.w1.grad) w2_grad_sum = self.check_acc(model_a._layers.w2.grad, w2_grad_sum, model_b._layers.w2.grad) model_a.clear_gradients()
def test_forward_expand(): @paddle.jit.to_static def expand1(inputs): return paddle.expand(inputs, shape=[2, 128]) @paddle.jit.to_static def expand2(inputs): return paddle.expand(inputs, shape=[2, 1, 4, 16]) @paddle.jit.to_static def expand3(inputs): return paddle.expand(inputs, shape=[2, 1, 3, 7, 7]) @paddle.jit.to_static def expand4(inputs): shape = paddle.to_tensor(np.array([2, 128]).astype("int32")) return paddle.expand(inputs, shape=shape) @paddle.jit.to_static def expand5(inputs): shape = paddle.to_tensor(np.array([2, 1, 4, 16]).astype("int32")) return paddle.expand(inputs, shape=shape) @paddle.jit.to_static def expand6(inputs): shape = paddle.to_tensor(np.array([2, 1, 3, 7, 7]).astype("int32")) return paddle.expand(inputs, shape=shape) data = paddle.rand([128], dtype="float32") verify_model(expand1, input_data=[data]) verify_model(expand4, input_data=[data]) data = paddle.rand([4, 16], dtype="float32") verify_model(expand2, input_data=[data]) verify_model(expand5, input_data=[data]) data = paddle.rand([1, 3, 7, 7], dtype="float32") verify_model(expand3, input_data=[data]) verify_model(expand6, input_data=[data])
def test_run(self): use_cuda = False with fluid.dygraph.guard(): rand(shape=[3, 4]) dim_1 = fluid.layers.fill_constant([1], "int64", 3) dim_2 = fluid.layers.fill_constant([1], "int32", 5) rand(shape=[dim_1, dim_2]) var_shape = fluid.dygraph.to_variable(np.array([3, 4])) rand(var_shape)
def test_forward_rnn(): class RNN(nn.Layer): def __init__(self, api_name, input_size, hidden_size, num_layers, direction="forward"): super(RNN, self).__init__() rnn_func = getattr(paddle.nn, api_name, None) self.rnn = rnn_func(input_size, hidden_size, num_layers, direction=direction) @paddle.jit.to_static def forward(self, inputs, prev_h): y, h = self.rnn(inputs, prev_h) return y input_size, hidden_size, num_layers = 8, 16, 2 input_shape = [4, 5, 8] input_data = paddle.rand(input_shape, dtype="float32") for api_name in ("SimpleRNN", "GRU"): prev_h = paddle.rand([4, 4, 16], dtype="float32") verify_model( RNN(api_name, input_size, hidden_size, num_layers, direction="bidirectional"), input_data=[input_data, prev_h], ) prev_h = paddle.rand([2, 4, 16], dtype="float32") verify_model(RNN(api_name, input_size, hidden_size, num_layers), input_data=[input_data, prev_h])
def cal_gradient_penalty(netD, real_data, fake_data, device, type='mixed', constant=1.0, lambda_gp=10.0): """Calculate the gradient penalty loss, used in WGAN-GP paper https://arxiv.org/abs/1704.00028 Arguments: netD (network) -- discriminator network real_data (tensor array) -- real images fake_data (tensor array) -- generated images from the generator device (str) -- GPU / CPU: from torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu') type (str) -- if we mix real and fake data or not [real | fake | mixed]. constant (float) -- the constant used in formula ( | |gradient||_2 - constant)^2 lambda_gp (float) -- weight for this loss Returns the gradient penalty loss """ if lambda_gp > 0.0: if type == 'real': # either use real images, fake images, or a linear interpolation of two. interpolatesv = real_data elif type == 'fake': interpolatesv = fake_data elif type == 'mixed': alpha = paddle.rand(real_data.shape[0], 1) alpha = alpha.expand( real_data.shape[0], real_data.nelement() // real_data.shape[0]).contiguous().view(*real_data.shape) interpolatesv = alpha * real_data + ((1 - alpha) * fake_data) else: raise NotImplementedError('{} not implemented'.format(type)) interpolatesv.requires_grad_(True) disc_interpolates = netD(interpolatesv) gradients = torch.autograd.grad( outputs=disc_interpolates, inputs=interpolatesv, grad_outputs=torch.ones(disc_interpolates.size()).to(device), create_graph=True, retain_graph=True, only_inputs=True) gradients = gradients[0].view(real_data.size(0), -1) # flat the data gradient_penalty = (((gradients + 1e-16).norm(2, dim=1) - constant)** 2).mean() * lambda_gp # added eps return gradient_penalty, gradients else: return 0.0, None
def test_forward_addmm(): class Addmm(nn.Layer): def __init__(self, alpha=1.0, beta=1.0): super(Addmm, self).__init__() self.alpha = alpha self.beta = beta @paddle.jit.to_static def forward(self, inputs, x, y): return paddle.addmm(inputs, x, y, self.alpha, self.beta) input_shapes = [[10, 10], [1, 1], [7, 1]] x_shapes = [[10, 3], [5, 6], [7, 7]] y_shapes = [[3, 10], [6, 2], [7, 3]] input_shapes = [[10, 10]] x_shapes = [[10, 3]] y_shapes = [[3, 10]] for i in range(len(input_shapes)): input_data = paddle.rand(input_shapes[i], dtype="float32") x_data = paddle.rand(x_shapes[i], dtype="float32") y_data = paddle.rand(y_shapes[i], dtype="float32") verify_model(Addmm(), input_data=[input_data, x_data, y_data]) verify_model(Addmm(0.5, 0.3), input_data=[input_data, x_data, y_data])
def test_jit_save_load_function_input_spec(self): @paddle.jit.to_static(input_spec=[ InputSpec(shape=[None, 6], dtype='float32', name='x'), ]) def fun(inputs): return paddle.nn.functional.relu(inputs) path = 'test_jit_save_load_function_2/func' inps = paddle.rand([3, 6]) origin = fun(inps) paddle.jit.save(fun, path) load_func = paddle.jit.load(path) load_result = load_func(inps) self.assertTrue((load_result - origin).abs().max() < 1e-10)
def test_inplace(self): paddle.disable_static() with paddle.fluid.dygraph.guard(): paddle.seed(100) a = paddle.rand(shape=[1, 4]) a.stop_gradient = False b = a[:] c = b b[paddle.to_tensor(0)] = 1.0 self.assertTrue(id(b) == id(c)) self.assertTrue(np.array_equal(b.numpy(), c.numpy())) self.assertEqual(b.inplace_version, 1) paddle.enable_static()
def test_forward_layer_norm(): @paddle.jit.to_static def layer_norm(inputs, weight, bias): return nn.functional.layer_norm(inputs, inputs.shape[-1], weight=weight, bias=bias) class LayerNorm(nn.Layer): def __init__(self): super(LayerNorm, self).__init__() data_shape = [10] self.layer_norm = nn.LayerNorm(data_shape) @paddle.jit.to_static def forward(self, inputs): return self.layer_norm(inputs) input_shape = [1, 3, 10, 10] input_data = paddle.rand(input_shape, dtype="float32") weight = paddle.rand([10], dtype="float32") bias = paddle.rand([10], dtype="float32") verify_model(layer_norm, input_data=[input_data, weight, bias]) verify_model(LayerNorm(), input_data=input_data)
def data_transform(config, X): if config.data.uniform_dequantization: X = X / 256.0 * 255.0 + paddle.rand(X.shape) / 256.0 if config.data.gaussian_dequantization: X = X + paddle.randn(X.shape) * 0.01 if config.data.rescaled: X = 2 * X - 1.0 elif config.data.logit_transform: X = logit_transform(X) if hasattr(config, "image_mean"): return X - config.image_mean.unsqueeze(0) return X
def test_cuda_stream_synchronize(self): if paddle.is_compiled_with_cuda(): s = paddle.device.cuda.Stream() e1 = paddle.device.cuda.Event(True, False, False) e2 = paddle.device.cuda.Event(True, False, False) e1.record(s) e1.query() tensor1 = paddle.to_tensor(paddle.rand([1000, 1000])) tensor2 = paddle.matmul(tensor1, tensor1) s.synchronize() e2.record(s) e2.synchronize() self.assertTrue(s.query())
def cal_gradient_penalty(netD, real_data, fake_data, edge_data=None, type='mixed', constant=1.0, lambda_gp=10.0): if lambda_gp > 0.0: if type == 'real': # either use real images, fake images, or a linear interpolation of two. interpolatesv = real_data elif type == 'fake': interpolatesv = fake_data elif type == 'mixed': alpha = paddle.rand((real_data.shape[0], 1)) alpha = paddle.expand( alpha, [1, np.prod(real_data.shape) // real_data.shape[0]]) alpha = paddle.reshape(alpha, real_data.shape) interpolatesv = alpha * real_data + ((1 - alpha) * fake_data) else: raise NotImplementedError('{} not implemented'.format(type)) # interpolatesv.requires_grad_(True) interpolatesv.stop_gradient = False real_data.stop_gradient = True fake_AB = paddle.concat((real_data.detach(), interpolatesv), 1) disc_interpolates = netD(fake_AB) # FIXME: use paddle.ones outs = paddle.fill_constant(disc_interpolates.shape, disc_interpolates.dtype, 1.0) gradients = paddle.imperative.grad( outputs=disc_interpolates, inputs=fake_AB, grad_outputs=outs, # paddle.ones(list(disc_interpolates.shape)), create_graph=True, retain_graph=True, only_inputs=True, # no_grad_vars=set(netD.parameters()) ) gradients = paddle.reshape(gradients[0], [real_data.shape[0], -1]) # flat the data gradient_penalty = paddle.reduce_mean( (paddle.norm(gradients + 1e-16, 2, 1) - constant)** 2) * lambda_gp # added eps return gradient_penalty, gradients else: return 0.0, None
def test_flatten_op_transposer(self): if not self.use_autoune(): return conv = paddle.nn.Conv2D(3, 8, (3, 3)) flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2) data = paddle.rand([1, 3, 16, 14]) with paddle.amp.auto_cast(level="O2"): conv_out = conv(data) # conv_out.shape = [1, 14, 12, 8] with NHWC # layout tuner will transpose conv_out to # [1, 8, 14, 12] with NCHW before the following flatten op # because it flatten the C and H dimensions. out = flatten(conv_out) self.assertEqual(conv_out.shape, [1, 14, 12, 8]) self.assertEqual(out.shape, [1, 112, 12])