def test_shuffle_and_random_permutation(self): x = np.arange(24).reshape([2, 3, 4]) # shuffle for axis in range(-len(x.shape), len(x.shape)): equal_count = 0 for k in range(100): t = T.random.shuffle(T.from_numpy(x), axis=axis) if np.all(np.equal(T.to_numpy(t), x)): equal_count += 1 assert_equal(np.sort(T.to_numpy(t), axis=axis), x) self.assertLess(equal_count, 100) # random_permutation for dtype, device in product(int_dtypes, [None, T.CPU_DEVICE]): for n in [0, 1, 5]: x = np.arange(n) equal_count = 0 for k in range(100): t = T.random.random_permutation(n, dtype=dtype, device=device) self.assertEqual(T.get_dtype(t), dtype) self.assertEqual(T.get_device(t), device or T.current_device()) if np.all(np.equal(T.to_numpy(t), x)): equal_count += 1 assert_equal(np.sort(T.to_numpy(t)), x) if n > 1: self.assertLess(equal_count, 100)
def test_xavier_initializer(self): for dtype, initializer, mode in product( float_dtypes, (tk.init.xavier_normal, tk.init.xavier_uniform), (None, 'fan_in', 'fan_out'), ): weight = T.variable([n_samples // 50, 50], dtype=dtype, initializer=0.) assert_equal(weight, T.full_like(weight, 0.)) mode_arg = {'mode': mode} if mode is not None else {} # xavier fan_in, fan_out = tk.init.calculate_fan_in_and_fan_out(weight) xavier_std = np.sqrt(2.0 / float(fan_in + fan_out)) tk.init.apply_initializer(weight, initializer, **mode_arg) self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))), 5.0 / xavier_std / np.sqrt(n_samples)) # xavier with custom gain and fan_in/fan_out fan_in, fan_out = 23, 17 init_gain = 1.5 xavier_std = init_gain * np.sqrt(2.0 / float(fan_in + fan_out)) tk.init.apply_initializer(weight, initializer, fan_in_and_fan_out=(fan_in, fan_out), gain=init_gain, **mode_arg) self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))), 5.0 / xavier_std / np.sqrt(n_samples))
def test_seed(self): T.random.seed(1234) x = T.to_numpy(T.random.normal(T.as_tensor(0.), T.as_tensor(1.))) y = T.to_numpy(T.random.normal(T.as_tensor(0.), T.as_tensor(1.))) self.assertFalse(np.allclose(x, y)) T.random.seed(1234) z = T.to_numpy(T.random.normal(T.as_tensor(0.), T.as_tensor(1.))) assert_allclose(x, z)
def test_negative_sampling(self): def sigmoid(x): return np.where( x >= 0., 1. / (1. + np.exp(-x)), np.exp(x) / (1. + np.exp(x)), ) def log_sigmoid(x): return np.log(sigmoid(x)) def f(pos_logits, neg_logits, reduction='none', negative=False): o = log_sigmoid(pos_logits) + np.sum(log_sigmoid(-neg_logits), axis=-1) if reduction == 'sum': o = np.sum(o) elif reduction == 'mean': o = np.mean(o) if negative: o = -o return o for k in [1, 5]: for reduction, negative in product( (None, 'none', 'sum', 'mean'), (None, True, False), ): pos_logits = T.random.randn([10]) neg_logits = T.random.randn([10, k]) kwargs = {'negative': negative} if negative is not None else {} if reduction is not None: kwargs['reduction'] = reduction x = T.losses.negative_sampling(pos_logits, neg_logits, **kwargs) y = f(T.to_numpy(pos_logits), T.to_numpy(neg_logits), **kwargs) assert_allclose(x, y, atol=1e-4, rtol=1e-6) # test errors with pytest.raises(Exception, match='`pos_logits` must be 1d, ' '`neg_logits` must be 2d'): _ = T.losses.negative_sampling(neg_logits, neg_logits) with pytest.raises(Exception, match='`pos_logits` must be 1d, ' '`neg_logits` must be 2d'): _ = T.losses.negative_sampling(pos_logits, pos_logits) with pytest.raises(Exception, match='`pos_logits` must be 1d, ' '`neg_logits` must be 2d'): _ = T.losses.negative_sampling(pos_logits[:-1], neg_logits)
def test_dropout(self): n_samples = 10000 for spatial_ndims in (0, 1, 2, 3): cls = getattr(tk.layers, ('Dropout' if not spatial_ndims else f'Dropout{spatial_ndims}d')) layer = cls(p=0.3) self.assertIn('p=0.3', repr(layer)) self.assertIn('Dropout', repr(layer)) layer = tk.layers.jit_compile(layer) x = 1. + T.random.rand( make_conv_shape([1], n_samples, [2, 2, 2][:spatial_ndims])) # ---- train mode ---- set_train_mode(layer, True) y = layer(x) # check: channels should be all zero or no zero spatial_axis = tuple(get_spatial_axis(spatial_ndims)) all_zero = np.all(T.to_numpy(y) == 0, axis=spatial_axis, keepdims=True) no_zero = np.all(T.to_numpy(y) != 0, axis=spatial_axis, keepdims=True) self.assertTrue(np.all(np.logical_or(all_zero, no_zero))) # check: the probability of not being zero self.assertLessEqual( np.abs(np.mean(all_zero) - 0.3), 5.0 / np.sqrt(n_samples) * 0.3 * 0.7 # 5-sigma ) # check: the value assert_allclose(y, (T.to_numpy(x) * no_zero) / 0.7, rtol=1e-4, atol=1e-6) # ---- eval mode ---- set_train_mode(layer, False) y = layer(x) self.assertTrue(np.all(T.to_numpy(y) != 0)) assert_allclose(y, x, rtol=1e-4, atol=1e-6) set_train_mode(layer, True) set_eval_mode(layer) y = layer(x) self.assertTrue(np.all(T.to_numpy(y) != 0)) assert_allclose(y, x, rtol=1e-4, atol=1e-6)
def test_GlobalAvgPool(self): for spatial_ndims, keepdims in product((1, 2, 3), (True, False)): if T.IS_CHANNEL_LAST: reduce_axis = tuple(range(-spatial_ndims - 1, -1)) else: reduce_axis = tuple(range(-spatial_ndims, 0)) def fn(arr): return np.mean(arr, axis=reduce_axis, keepdims=keepdims) layer_factory = getattr(tk.layers, f'GlobalAvgPool{spatial_ndims}d') layer = layer_factory(keepdims=keepdims) self.assertEqual( repr(layer), f'GlobalAvgPool{spatial_ndims}d(keepdims={keepdims})') layer = tk.layers.jit_compile(layer) x = T.random.randn( make_conv_shape([4, 5], 6, [7, 8, 9][:spatial_ndims])) assert_allclose(layer(x), fn(T.to_numpy(x)), rtol=1e-4, atol=1e-6) with pytest.raises(Exception, match=r'`rank\(input\)` is too low'): _ = layer(T.random.randn([5]))
def test_iwae(self): assert_allclose_ = functools.partial(assert_allclose, rtol=1e-5, atol=1e-6) x, y, z, f, log_f, log_q = prepare_test_payload(reparameterized=True) wk_hat = f / T.reduce_sum(f, axis=[0], keepdims=True) cost = iwae_estimator(log_f, axis=[0]) assert_allclose_(-cost, iwae_estimator(log_f, axis=[0], negative=True)) assert_allclose_(T.reduce_mean(cost), iwae_estimator(log_f, axis=[0], reduction='mean')) assert_allclose_(T.reduce_sum(cost), iwae_estimator(log_f, axis=[0], reduction='sum')) cost_shape = T.shape(cost) assert_allclose_( T.grad([T.reduce_sum(cost)], [y])[0], T.reduce_sum(wk_hat * (2 * x * y), axis=[0])) x, y, z, f, log_f, log_q = prepare_test_payload(reparameterized=True) wk_hat = f / T.reduce_sum(f, axis=[0], keepdims=True) cost_k = iwae_estimator(log_f, axis=[0], keepdims=True) assert_allclose_( T.reduce_mean(cost_k), iwae_estimator(log_f, axis=[0], keepdims=True, reduction='mean')) assert_allclose_( T.reduce_sum(cost_k), iwae_estimator(log_f, axis=[0], keepdims=True, reduction='sum')) assert_allclose_( -cost_k, T.to_numpy( iwae_estimator(log_f, axis=[0], keepdims=True, negative=True))) self.assertListEqual([1] + cost_shape, T.shape(cost_k)) assert_allclose_( T.grad([T.reduce_sum(cost_k)], [y])[0], T.reduce_sum(wk_hat * (2 * x * y), axis=[0]))
def ensure_stacks_causality(ctx, outputs, size: List[int], pos: List[int]): ctx.assertEqual(len(outputs), len(size)) spatial_ndims = len(outputs) for i in range(spatial_ndims): output = outputs[i] if isinstance(output, T.Tensor): output = T.to_numpy(output) output = output.reshape(size) this_pos = list(pos) this_pos[i] += 1 k = i while k > 0 and this_pos[k] >= size[k]: this_pos[k - 1] += 1 this_pos[k] = 0 k -= 1 for j in range(i + 1, spatial_ndims): this_pos[j] = 0 if this_pos[0] >= size[0]: mask = np.zeros(size, dtype=np.float32) else: mask = make_causal_test_input(size, this_pos, single_point=False) is_wrong = np.any( np.logical_and( np.abs(output) > 1e-6, np.logical_not(mask.astype(np.bool)))) ctx.assertFalse( is_wrong, msg=f'stack.id={i}, pos={pos}, output={output}, mask={mask}')
def test_randint(self): for low, high in [(0, 5), (-3, 4)]: for dtype, device in product(number_dtypes, [None, T.CPU_DEVICE]): # test sample dtype and shape t = T.random.randint(low=low, high=high, shape=[n_samples, 2, 3, 4], dtype=dtype, device=device) self.assertEqual(T.get_dtype(t), dtype) self.assertEqual(T.get_device(t), device or T.current_device()) self.assertEqual(T.shape(t), [n_samples, 2, 3, 4]) x = T.to_numpy(t).astype(np.int32) # test sample value range r = list(range(low, high)) self.assertTrue( all((int(v) in r) for v in set(x.reshape([-1]).tolist()))) # test the prob of each value p = 1. / len(r) size = 1. * np.size(x) for i in r: self.assertLessEqual( abs(np.sum(x == i) / size - p), 5. * np.sqrt(p * (1. - p)) / np.sqrt(size)) with pytest.raises(Exception, match='`low` < `high` does not hold'): _ = T.random.randint(low=2, high=1, shape=[2, 3, 4])
def f(t): if T.sparse.is_sparse_tensor(t): t = T.sparse.to_numpy(t) if isinstance(t, (T.Tensor, StochasticTensor)): t = T.to_numpy(T.as_tensor(t)) if isinstance(t, sp.spmatrix): t = t.toarray() return t
def test_kaming_initializer(self): for dtype, initializer, mode in product( float_dtypes, (tk.init.kaming_normal, tk.init.kaming_uniform), (None, 'fan_in', 'fan_out'), ): weight = T.variable([n_samples // 50, 50], dtype=dtype, initializer=0.) assert_equal(weight, T.full_like(weight, 0.)) mode_arg = {'mode': mode} if mode is not None else {} # kaming fan_in, fan_out = tk.init.calculate_fan_in_and_fan_out(weight) if mode == 'fan_out': kaming_std = np.sqrt(1.0 / np.sqrt(fan_out)) else: kaming_std = np.sqrt(1.0 / np.sqrt(fan_in)) tk.init.apply_initializer(weight, initializer, **mode_arg) self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))), 5.0 / kaming_std / np.sqrt(n_samples)) # kaming with custom gain and fan_in/fan_out fan_in, fan_out = 23, 17 init_gain = 1.5 if mode == 'fan_out': kaming_std = init_gain * np.sqrt(1.0 / np.sqrt(fan_out)) else: kaming_std = init_gain * np.sqrt(1.0 / np.sqrt(fan_in)) tk.init.apply_initializer(weight, initializer, fan_in_and_fan_out=(fan_in, fan_out), gain=init_gain, **mode_arg) self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))), 5.0 / kaming_std / np.sqrt(n_samples)) # test error with pytest.raises( ValueError, match='`mode` must be either "fan_in" or "fan_out"'): weight = T.variable([n_samples // 50, 50], dtype=dtype, initializer=0.) tk.init.apply_initializer(weight, initializer, mode='invalid')
def test_normal(self): for dtype in float_dtypes: weight = T.variable([n_samples // 50, 50], dtype=dtype, initializer=0.) assert_equal(weight, T.full_like(weight, 0.)) # uniform with default args tk.init.apply_initializer(weight, tk.init.normal) self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))), 5.0 / np.sqrt(n_samples)) # uniform with customized args tk.init.apply_initializer(weight, partial(tk.init.normal, mean=1., std=3.)) self.assertLessEqual( np.abs(T.to_numpy(T.reduce_mean(weight)) - 1.), 5.0 * 3. / np.sqrt(n_samples))
def test_uniform(self): for dtype in float_dtypes: weight = T.variable([n_samples // 50, 50], dtype=dtype, initializer=0.) assert_equal(weight, T.full_like(weight, 0.)) # uniform with default args tk.init.apply_initializer(weight, tk.init.uniform) self.assertLessEqual( np.abs(T.to_numpy(T.reduce_mean(weight)) - 0.5), 5.0 / np.sqrt(12.) / np.sqrt(n_samples)) # uniform with customized args tk.init.apply_initializer( weight, partial(tk.init.uniform, low=-4., high=3.)) self.assertLessEqual( np.abs(T.to_numpy(T.reduce_mean(weight)) - (-0.5)), 5.0 * 7.0 / np.sqrt(12.) / np.sqrt(n_samples))
def ensure_full_receptive_field(ctx, output, size: List[int], pos: List[int]): if isinstance(output, T.Tensor): output = T.to_numpy(output) output_true = (np.abs(output.reshape(size)) >= 1e-6).astype(np.int32) mask = make_causal_mask(size, pos).astype(np.int32) ctx.assertTrue(np.all( np.logical_not( np.logical_xor(mask.astype(np.bool), output_true.astype(np.bool)))), msg=f'pos={pos}, output_true={output_true}, mask={mask}')
def test_random_init(self): for dtype in float_dtypes: t = T.variable([n_samples, 2, 3], dtype=dtype) for fn, mean, std in [ (partial(T.random.normal_init, mean=1., std=2.), 1., 2.), (partial(T.random.uniform_init, low=0., high=1.), 0.5, 1. / math.sqrt(12)), ]: fn(t) t_mean = np.mean(T.to_numpy(t)) self.assertLess(abs(t_mean - mean), 3. * std / math.sqrt(n_samples * 2 * 3))
def test_StatsRecorder(self): for shape in ([], [4]): metrics = mltk.GeneralMetricCollector(shape) arrays = [T.random.randn([2, 4]), T.random.randn([3, 4])] for arr in arrays: metrics.update(T.to_numpy(arr)) standard_recorder_check( self, StatsRecorder(shape), arrays, metrics.stats, )
def f(g): a = T.variable([], initializer=123.) b = T.variable([], initializer=456.) c = T.variable([], initializer=789.) T.random.seed(1234) optimizer = optimizer_factory([a, b, c], lr) with optimizer.capture_grad(): optimizer.add_loss((a + b)**2) g(optimizer) optimizer.step() return [T.to_numpy(t) for t in (a, b, c, (a + b)**2)]
def plot_samples(epoch=None): epoch = epoch or loop.epoch with tk.layers.scoped_eval_mode(vae), T.no_grad(): logits = vae.p(n_z=100)['x'].distribution.logits images = T.reshape( T.cast(T.clip(T.nn.sigmoid(logits) * 255., 0., 255.), dtype=T.uint8), [-1, 28, 28], ) utils.save_images_collection( images=T.to_numpy(images), filename=exp.abspath(f'plotting/{epoch}.png'), grid_size=(10, 10), )
def test_rand(self): for dtype, device in product(float_dtypes, [None, T.CPU_DEVICE]): # test sample dtype and shape t = T.random.rand([n_samples, 2, 3, 4], dtype=dtype, device=device) self.assertEqual(T.get_dtype(t), dtype) self.assertEqual(T.get_device(t), device or T.current_device()) self.assertEqual(T.shape(t), [n_samples, 2, 3, 4]) # test sample mean x = T.to_numpy(t) x_mean = np.mean(x, axis=0) np.testing.assert_array_less( np.abs(0.5 - x_mean), (3. * np.sqrt(1. / 12) / np.sqrt(n_samples) * np.ones_like(x_mean)))
def test_matmul(self): indices = T.as_tensor(np.random.randint(0, 50, size=[2, 200])) values = T.random.randn([200]) shape = [60, 50] y = T.random.randn([50, 30]) for force_coalesced in [False, True]: x = T.sparse.make_sparse(indices, values, shape=shape, force_coalesced=force_coalesced) assert_allclose( T.sparse.matmul(x, y), np.dot(T.sparse.to_numpy(x), T.to_numpy(y)), rtol=1e-4, atol=1e-6, )
def do_test_sample(is_one_hot: bool, n_z: Optional[int], dtype: Optional[str], float_dtype: str): probs_t = T.as_tensor(probs, dtype=float_dtype) logits_t = T.as_tensor(logits, dtype=float_dtype) value_shape = [n_classes] if is_one_hot else [] if dtype is not None: expected_dtype = dtype else: expected_dtype = T.int32 if is_one_hot else T.categorical_dtype # sample sample_shape = [n_z] if n_z is not None else [] kwargs = {'dtype': dtype} if dtype else {} t = (T.random.one_hot_categorical if is_one_hot else T.random.categorical)(probs_t, n_samples=n_z, **kwargs) self.assertEqual(T.get_dtype(t), expected_dtype) self.assertEqual(T.get_device(t), T.current_device()) self.assertEqual(T.shape(t), sample_shape + [2, 3, 4] + value_shape) # check values x = T.to_numpy(t) if is_one_hot: self.assertEqual(set(x.flatten().tolist()), {0, 1}) else: if n_z is None: self.assertTrue( set(x.flatten().tolist()).issubset( set(range(n_classes)))) else: self.assertEqual(set(x.flatten().tolist()), set(range(n_classes))) # check log_prob do_check_log_prob( given=t, batch_ndims=len(t.shape) - int(is_one_hot), Z_log_prob_fn=partial( (T.random.one_hot_categorical_log_prob if is_one_hot else T.random.categorical_log_prob), logits=logits_t), np_log_prob=log_prob(x, probs, n_classes, is_one_hot))
def do_test_sample(n_z, sample_shape, float_dtype, dtype): probs_t = T.as_tensor(probs, dtype=float_dtype) logits_t = T.as_tensor(logits, dtype=float_dtype) t = T.random.bernoulli(probs=probs_t, n_samples=n_z, dtype=dtype) self.assertEqual(T.get_dtype(t), dtype) self.assertEqual(T.get_device(t), T.current_device()) self.assertEqual(T.shape(t), sample_shape + [2, 3, 4]) # all values must be either 0 or 1 x = T.to_numpy(t) self.assertEqual(set(x.flatten().tolist()), {0, 1}) # check the log prob do_check_log_prob( given=t, batch_ndims=len(t.shape), Z_log_prob_fn=partial(T.random.bernoulli_log_prob, logits=logits_t), np_log_prob=log_prob(x))
def test_randn(self): for dtype, device in product(float_dtypes, [None, T.CPU_DEVICE]): # test sample dtype and shape t = T.random.randn([n_samples, 2, 3, 4], dtype=dtype, device=device) self.assertEqual(T.get_dtype(t), dtype) self.assertEqual(T.get_device(t), device or T.current_device()) self.assertEqual(T.shape(t), [n_samples, 2, 3, 4]) # test sample mean x = T.to_numpy(t) x_mean = np.mean(x, axis=0) np.testing.assert_array_less( np.abs(x_mean), 3. / np.sqrt(n_samples) * np.ones_like(x_mean)) # test log_prob do_check_log_prob(given=t, batch_ndims=len(x.shape), Z_log_prob_fn=T.random.randn_log_pdf, np_log_prob=np.log( np.exp(-x**2 / 2.) / np.sqrt(2 * np.pi)))
def test_uniform(self): for low, high in [(-1., 2.), (3.5, 7.5)]: for dtype, device in product(float_dtypes, [None, T.CPU_DEVICE]): # test sample dtype and shape t = T.random.uniform([n_samples, 2, 3, 4], low=low, high=high, dtype=dtype, device=device) self.assertEqual(T.get_dtype(t), dtype) self.assertEqual(T.get_device(t), device or T.current_device()) self.assertEqual(T.shape(t), [n_samples, 2, 3, 4]) # test sample mean x = T.to_numpy(t) x_mean = np.mean(x, axis=0) np.testing.assert_array_less( np.abs(x_mean - 0.5 * (high + low)), (5. * np.sqrt((high - low)**2 / 12) / np.sqrt(n_samples) * np.ones_like(x_mean))) with pytest.raises(Exception, match='`low` < `high` does not hold'): _ = T.random.uniform([2, 3, 4], low=2., high=1.)
def get_samples(mean, log_scale, n_samples=None, **kwargs): seed = next_seed() kwargs.setdefault('epsilon', 1e-7) sample_shape = T.get_broadcast_shape(T.shape(mean), T.shape(log_scale)) if n_samples is not None: sample_shape = [n_samples] + sample_shape np.random.seed(seed) T.random.seed(seed) u = T.random.uniform(shape=sample_shape, low=kwargs['epsilon'], high=1. - kwargs['epsilon'], dtype=T.get_dtype(mean)) u = T.to_numpy(u) np.random.seed(seed) T.random.seed(seed) r = T.random.discretized_logistic(mean, log_scale, n_samples=n_samples, **kwargs) return u, r
def test_categorical(self): def log_softmax(x, axis): x_max = np.max(x, axis=axis, keepdims=True) x_diff = x - x_max return x_diff - np.log( np.sum(np.exp(x_diff), axis=axis, keepdims=True)) def softmax(x, axis): return np.exp(log_softmax(x, axis)) def one_hot(x: np.ndarray, n_classes: int): I = np.eye(n_classes, dtype=x.dtype) return I[x.astype(np.int32)] def log_prob(given, probs, n_classes: int, is_one_hot: bool = False): if not is_one_hot: given = one_hot(given, n_classes) # return np.log(np.prod(element_pow(probs, one-hot-given), axis=-1)) return np.sum(given * np.log(probs), axis=-1) n_classes = 5 logits = np.clip(np.random.randn(2, 3, 4, n_classes) / 10., a_min=-0.3, a_max=0.3) probs = softmax(logits, axis=-1) # categorical_logits_to_probs and categorical_probs_to_logits for dtype in float_dtypes: logits_t = T.as_tensor(logits, dtype=dtype) probs_t = T.as_tensor(probs, dtype=dtype) assert_allclose(T.random.categorical_logits_to_probs(logits_t), probs, rtol=1e-4) assert_allclose(T.random.categorical_probs_to_logits(probs_t), np.log(probs), rtol=1e-4) T.assert_finite( T.random.categorical_probs_to_logits( T.as_tensor(np.array([0., 1.]))), 'logits') # sample def do_test_sample(is_one_hot: bool, n_z: Optional[int], dtype: Optional[str], float_dtype: str): probs_t = T.as_tensor(probs, dtype=float_dtype) logits_t = T.as_tensor(logits, dtype=float_dtype) value_shape = [n_classes] if is_one_hot else [] if dtype is not None: expected_dtype = dtype else: expected_dtype = T.int32 if is_one_hot else T.categorical_dtype # sample sample_shape = [n_z] if n_z is not None else [] kwargs = {'dtype': dtype} if dtype else {} t = (T.random.one_hot_categorical if is_one_hot else T.random.categorical)(probs_t, n_samples=n_z, **kwargs) self.assertEqual(T.get_dtype(t), expected_dtype) self.assertEqual(T.get_device(t), T.current_device()) self.assertEqual(T.shape(t), sample_shape + [2, 3, 4] + value_shape) # check values x = T.to_numpy(t) if is_one_hot: self.assertEqual(set(x.flatten().tolist()), {0, 1}) else: if n_z is None: self.assertTrue( set(x.flatten().tolist()).issubset( set(range(n_classes)))) else: self.assertEqual(set(x.flatten().tolist()), set(range(n_classes))) # check log_prob do_check_log_prob( given=t, batch_ndims=len(t.shape) - int(is_one_hot), Z_log_prob_fn=partial( (T.random.one_hot_categorical_log_prob if is_one_hot else T.random.categorical_log_prob), logits=logits_t), np_log_prob=log_prob(x, probs, n_classes, is_one_hot)) # overall test on various arguments for is_one_hot in (True, False): for n_z in (None, 100): do_test_sample(is_one_hot, n_z, None, T.float32) for dtype in (None, ) + number_dtypes: for float_dtype in float_dtypes: do_test_sample(True, 100, dtype, float_dtype) # sample with 2d probs for Z_sample_fn in (T.random.categorical, T.random.one_hot_categorical): is_one_hot = Z_sample_fn == T.random.one_hot_categorical this_probs = probs[0, 0] t = Z_sample_fn(probs=T.as_tensor(this_probs), n_samples=100) self.assertEqual(T.shape(t), [100, 4] + ([n_classes] if is_one_hot else [])) x = T.to_numpy(t) logits_t = T.as_tensor(np.log(this_probs)) do_check_log_prob( given=t, batch_ndims=len(t.shape) - int(is_one_hot), Z_log_prob_fn=partial( (T.random.one_hot_categorical_log_prob if is_one_hot else T.random.categorical_log_prob), logits=logits_t), np_log_prob=log_prob(x, this_probs, n_classes, is_one_hot)) # given has lower rank than params, broadcasted to match param for is_one_hot in (False, True): logits_t = T.as_tensor(logits, dtype=T.float32) for val in range(n_classes): given = (one_hot(np.asarray(val), n_classes) if is_one_hot else np.asarray(val)) given_t = T.as_tensor(given) Z_log_prob_fn = (T.random.one_hot_categorical_log_prob if is_one_hot else T.random.categorical_log_prob) assert_allclose(Z_log_prob_fn(given_t, logits_t), log_prob(given, probs, n_classes, is_one_hot), rtol=1e-4) # argument error for Z_sample_fn in (T.random.categorical, T.random.one_hot_categorical): with pytest.raises(Exception, match='`n_samples` must be at least 1'): _ = Z_sample_fn(probs=T.as_tensor(probs), n_samples=0) with pytest.raises(Exception, match='The rank of `probs` must be at ' 'least 1'): _ = Z_sample_fn(probs=T.as_tensor(probs[0, 0, 0, 0]))
def test_causality_and_receptive_field(self): for size in [[12], [12, 11], [12, 11, 10]]: spatial_ndims = len(size) for kernel_size in [3, 5, [5, 3, 5][:spatial_ndims]]: # ---- construct the layers ---- # the input layer input_layer_cls = getattr(tk.layers, f'PixelCNNInput{spatial_ndims}d') input_layer = input_layer_cls( 1, 1, kernel_size=kernel_size, edge_bias=False, weight_init=tk.init.ones, ) input_layer = tk.layers.jit_compile(input_layer) with pytest.raises(Exception, match='`input` is expected to be .*d'): _ = input_layer(T.zeros([1] * (spatial_ndims + 1))) with pytest.raises(Exception, match='`input` is expected to be .*d'): _ = input_layer(T.zeros([1] * (spatial_ndims + 3))) # `add_ones_channnel = True` input_layer2 = input_layer_cls(1, 1, kernel_size=kernel_size, weight_init=tk.init.ones) # the pixelcnn resblock resblock_layer_cls = getattr( tk.layers, f'PixelCNNResBlock{spatial_ndims}d') with pytest.raises(ValueError, match=r'`kernel_size` is required to be at ' r'least 3'): _ = resblock_layer_cls(1, 1, kernel_size=1) with pytest.raises( ValueError, match=r'`kernel_size` is required to be odd'): _ = resblock_layer_cls(1, 1, kernel_size=[4, 3, 5][:spatial_ndims]) resblock_layer = resblock_layer_cls(1, 1, kernel_size=kernel_size, weight_init=tk.init.ones) resblock_layer = tk.layers.jit_compile(resblock_layer) with pytest.raises(Exception): _ = resblock_layer([T.zeros([])] * (spatial_ndims - 1)) with pytest.raises(Exception): _ = resblock_layer([T.zeros([])] * (spatial_ndims + 1)) # the down-sampling and up-sampling layer down_sample_cls = getattr(tk.layers, f'PixelCNNConv{spatial_ndims}d') down_sample_layer = down_sample_cls(1, 1, kernel_size, stride=2) down_sample_layer = tk.layers.jit_compile(down_sample_layer) down_sample_output_size = T.shape( down_sample_layer( [T.zeros(make_conv_shape([1], 1, size))] * spatial_ndims)[0]) up_sample_cls = getattr( tk.layers, f'PixelCNNConvTranspose{spatial_ndims}d') up_sample_layer = up_sample_cls( 1, 1, kernel_size, stride=2, output_padding=tk.layers.get_deconv_output_padding( input_size=[ down_sample_output_size[a] for a in get_spatial_axis(spatial_ndims) ], output_size=size, kernel_size=kernel_size, stride=2, padding= 'half', # sum of the both sides == (kernel_size - 1) * dilation )) up_sample_layer = tk.layers.jit_compile(up_sample_layer) # the output layer output_layer_cls = getattr(tk.layers, f'PixelCNNOutput{spatial_ndims}d') output_layer = output_layer_cls() output_layer = tk.layers.jit_compile(output_layer) with pytest.raises( Exception, match=r'`len\(inputs\)` is expected to be .*'): _ = output_layer([T.zeros([])] * (spatial_ndims - 1)) with pytest.raises( Exception, match=r'`len\(inputs\)` is expected to be .*'): _ = output_layer([T.zeros([])] * (spatial_ndims + 1)) # ---- test the causality ---- for pos, single_point in product(iter_causal_test_pos(size), (True, False)): x = make_causal_test_input(size, pos, single_point=single_point) x_t = T.as_tensor(x) # check the input layer output outputs = input_layer(x_t) ensure_stacks_causality(self, outputs, size, pos) # check the final output assert_allclose(output_layer(outputs), outputs[-1]) # check the resblock output resblock_outputs = resblock_layer(outputs) ensure_stacks_causality(self, resblock_outputs, size, pos) outputs2 = resblock_outputs for i in range(4): outputs2 = resblock_layer(outputs2) ensure_full_receptive_field(self, outputs2[-1], size, pos) # check the down-sample and up-sample down_sample_outputs = down_sample_layer(outputs) up_sample_outputs = up_sample_layer(down_sample_outputs) ensure_stacks_causality(self, up_sample_outputs, size, pos) # ---- test zero input on different input layers ---- x_t = T.zeros(make_conv_shape([1], 1, size), dtype=T.float32) outputs = input_layer(x_t) assert_equal( (np.abs(T.to_numpy(outputs[-1])) >= 1e-6).astype(np.int32), x_t) outputs = input_layer2(x_t) assert_equal( (np.abs(T.to_numpy(outputs[-1])) >= 1e-6).astype(np.int32), make_causal_mask(size, [0] * spatial_ndims).astype(np.int32))
def test_sample_and_log_prob(self): array_low = np.random.randn(2, 1) array_high = np.exp(np.random.randn(1, 3)) + 1. log_zero = -1e6 def log_prob(x, low, high, group_ndims=0): if low is None and high is None: low, high = 0., 1. log_pdf = -np.log(np.ones_like(x) * (high - low)) log_pdf = np.where(np.logical_and(low <= x, x <= high), log_pdf, log_zero) log_pdf = np.sum(log_pdf, axis=tuple(range(-group_ndims, 0))) return log_pdf for shape, dtype, (low, high), event_ndims in product( [None, [], [5, 4]], float_dtypes, [(None, None), (-1., 2.), (array_low, array_high)], range(5)): low_rank = len(np.shape(low)) if low is not None else 0 if event_ndims > len(shape or []) + low_rank: continue if isinstance(low, np.ndarray): low_t = T.as_tensor(low, dtype=dtype) high_t = T.as_tensor(high, dtype=dtype) uniform = Uniform(shape=shape, low=low_t, high=high_t, event_ndims=event_ndims, log_zero=log_zero) value_shape = (shape or []) + [2, 3] self.assertIs(uniform.low, low_t) self.assertIs(uniform.high, high_t) else: uniform = Uniform(shape=shape, low=low, high=high, dtype=dtype, event_ndims=event_ndims, log_zero=log_zero) value_shape = shape or [] self.assertEqual(uniform.low, low) self.assertEqual(uniform.high, high) self.assertEqual(uniform.log_zero, log_zero) self.assertEqual(uniform.value_shape, value_shape) # sample(n_samples=None) t = uniform.sample() x = T.to_numpy(t.tensor) sample_shape = value_shape self.assertIsInstance(t, StochasticTensor) self.assertIs(t.distribution, uniform) self.assertEqual(T.get_dtype(t.tensor), dtype) self.assertEqual(t.n_samples, None) self.assertEqual(t.group_ndims, 0) self.assertEqual(t.reparameterized, True) self.assertIsInstance(t.tensor, T.Tensor) self.assertEqual(T.shape(t.tensor), sample_shape) for log_pdf in [t.log_prob(), uniform.log_prob(t)]: self.assertEqual(T.get_dtype(log_pdf), dtype) assert_allclose(log_pdf, log_prob(x, low, high, event_ndims), rtol=1e-4) # test log-prob on out-of-range values assert_allclose( uniform.log_prob(t.tensor * 10.), log_prob(x * 10., low, high, event_ndims), rtol=1e-4, ) # sample(n_samples=7) if event_ndims >= 1: t = uniform.sample(n_samples=7, group_ndims=-1, reparameterized=False) x = T.to_numpy(t.tensor) sample_shape = [7] + value_shape self.assertIsInstance(t, StochasticTensor) self.assertIs(t.distribution, uniform) self.assertEqual(T.get_dtype(t.tensor), dtype) self.assertEqual(t.n_samples, 7) self.assertEqual(t.group_ndims, -1) self.assertEqual(t.reparameterized, False) self.assertIsInstance(t.tensor, T.Tensor) self.assertEqual(T.shape(t.tensor), sample_shape) reduce_ndims = event_ndims - 1 for log_pdf in [ t.log_prob(), uniform.log_prob(t, group_ndims=-1) ]: self.assertEqual(T.get_dtype(log_pdf), dtype) assert_allclose(log_pdf, log_prob(x, low, high, reduce_ndims), rtol=1e-4) # test reparameterized low_t = T.requires_grad(T.as_tensor(array_low)) high_t = T.requires_grad(T.as_tensor(array_high)) uniform = Uniform(low=low_t, high=high_t) t = uniform.sample() self.assertTrue(t.reparameterized) u = (T.to_numpy(t.tensor) - array_low) / (array_high - array_low) [low_grad, high_grad] = T.grad([T.reduce_sum(t.tensor)], [low_t, high_t]) assert_allclose(low_grad, np.sum(1. - u, axis=-1, keepdims=True), rtol=1e-4) assert_allclose(high_grad, np.sum(u, axis=0, keepdims=True), rtol=1e-4) t = uniform.sample(reparameterized=False) w_t = T.requires_grad(T.as_tensor(np.random.randn(2, 3))) self.assertFalse(t.reparameterized) [low_grad, high_grad] = T.grad([T.reduce_sum(w_t * t.tensor)], [low_t, high_t], allow_unused=True) self.assertTrue(T.is_null_grad(low_t, low_grad)) self.assertTrue(T.is_null_grad(high_t, high_grad))
def test_construct(self): for dtype in float_dtypes: # specify no args uniform = Uniform(dtype=dtype, event_ndims=0) self.assertEqual(uniform.value_shape, []) self.assertEqual(uniform.low, None) self.assertEqual(uniform.high, None) self.assertEqual(uniform.event_ndims, 0) self.assertEqual(uniform.log_zero, -1e7) # specify `low` and `high` float uniform = Uniform(low=-1., high=2., dtype=dtype, event_ndims=0) self.assertEqual(uniform.value_shape, []) self.assertEqual(uniform.low, -1.) self.assertEqual(uniform.high, 2.) self.assertEqual(uniform.event_ndims, 0) # specify `low`, `high` tensors low_t = T.full([2, 1], -1., dtype=dtype) high_t = T.full([1, 3], 2., dtype=dtype) uniform = Uniform(low=low_t, high=high_t, dtype=dtype, event_ndims=2) self.assertEqual(uniform.value_shape, [2, 3]) self.assertEqual(uniform.dtype, dtype) self.assertEqual(uniform.event_ndims, 2) self.assertIs(uniform.low, low_t) self.assertIs(uniform.high, high_t) # specify `low` or `high`, one as tensor and one as numpy array for low, high in [(low_t, T.to_numpy(high_t)), (T.to_numpy(low_t), high_t)]: uniform = Uniform( low=low, high=high, dtype=T.float32, # should be ignored event_ndims=2) self.assertEqual(uniform.value_shape, [2, 3]) self.assertEqual(uniform.dtype, dtype) self.assertEqual(T.get_dtype(uniform.low), dtype) self.assertEqual(T.get_dtype(uniform.high), dtype) self.assertEqual(uniform.event_ndims, 2) assert_equal(uniform.low, low_t) assert_equal(uniform.high, high_t) for event_ndims, dtype, shape in product(range(0, 3), float_dtypes, ([], [2, 3])): if event_ndims > len(shape): continue # specify `shape` uniform = Uniform(shape=shape, dtype=dtype, event_ndims=event_ndims) self.assertEqual(uniform.value_shape, shape) self.assertEqual(uniform.dtype, dtype) self.assertEqual(uniform.event_ndims, event_ndims) self.assertEqual(uniform.low, None) self.assertEqual(uniform.high, None) # specify `shape` and `low`, `high` floats uniform = Uniform(shape=shape, low=-1., high=2., dtype=dtype, event_ndims=event_ndims) self.assertEqual(uniform.value_shape, shape) self.assertEqual(uniform.dtype, dtype) self.assertEqual(uniform.event_ndims, event_ndims) self.assertEqual(uniform.low, -1.) self.assertEqual(uniform.high, 2.) # specify just one of `low`, `high` as float, another as tensor uniform = Uniform(shape=shape, low=-1., high=T.as_tensor(2., dtype=dtype), event_ndims=event_ndims) self.assertEqual(uniform.value_shape, shape) self.assertEqual(uniform.dtype, dtype) self.assertEqual(uniform.event_ndims, event_ndims) self.assertEqual(uniform.low, -1.) self.assertEqual(uniform.high, 2.) for event_ndims, dtype, shape in product(range(0, 3), float_dtypes, ([], [2, 3])): if event_ndims > len(shape) + 2: continue # specify `shape` and `low`, `high` tensors low_t = T.full([2, 1], -1., dtype=dtype) high_t = T.full([1, 3], 2., dtype=dtype) uniform = Uniform( shape=shape, low=low_t, high=high_t, dtype=T.float32, # should be ignored event_ndims=event_ndims) self.assertEqual(uniform.value_shape, shape + [2, 3]) self.assertEqual(uniform.dtype, dtype) self.assertEqual(uniform.event_ndims, event_ndims) self.assertIs(uniform.low, low_t) self.assertIs(uniform.high, high_t) with pytest.raises(ValueError, match='`low` and `high` must be both specified, or ' 'neither specified'): _ = Uniform(low=-1.) with pytest.raises( ValueError, match='`high.dtype` != `low.dtype`: float64 vs float32'): _ = Uniform(low=T.full([2, 3], -1., dtype=T.float32), high=T.full([2, 3], 2., dtype=T.float64)) with pytest.raises(ValueError, match='`low` < `high` does not hold: `low` == 2.0, ' '`high` == 1.0'): _ = Uniform(low=2., high=1.) with pytest.raises(Exception, match='`low` < `high` does not hold'): _ = Uniform(low=T.full([2, 3], 2., dtype=T.float32), high=T.full([2, 3], -1., dtype=T.float32), validate_tensors=True)
def do_test_sample(bin_size: float, min_val: Optional[float], max_val: Optional[float], discretize_sample: bool, discretize_given: bool, biased_edges: bool, reparameterized: bool, n_samples: Optional[int], validate_tensors: bool, dtype: str): mean_t = T.as_tensor(mean, dtype=dtype) log_scale_t = T.as_tensor(log_scale, dtype=dtype) value_shape = T.get_broadcast_shape(T.shape(mean_t), T.shape(log_scale_t)) # sample sample_shape = [n_samples] if n_samples is not None else [] u, t = get_samples( mean_t, log_scale_t, n_samples=n_samples, bin_size=bin_size, min_val=min_val, max_val=max_val, discretize=discretize_sample, reparameterized=reparameterized, epsilon=T.EPSILON, validate_tensors=validate_tensors, ) self.assertEqual(T.get_dtype(t), dtype) self.assertEqual(T.get_device(t), T.current_device()) self.assertEqual(T.shape(t), sample_shape + value_shape) # check values this_mean = mean.astype(dtype) this_log_scale = log_scale.astype(dtype) expected_t = naive_discretized_logistic_sample( u, this_mean, this_log_scale, bin_size, min_val, max_val, discretize_sample=discretize_sample, ) assert_allclose(t, expected_t, rtol=1e-4, atol=1e-6) # check log_prob do_check_log_prob(given=t, batch_ndims=len(t.shape), Z_log_prob_fn=partial( T.random.discretized_logistic_log_prob, mean=mean_t, log_scale=log_scale_t, bin_size=bin_size, min_val=min_val, max_val=max_val, biased_edges=biased_edges, discretize=discretize_given, validate_tensors=validate_tensors, ), np_log_prob=naive_discretized_logistic_pdf( x=T.to_numpy(t), mean=this_mean, log_scale=this_log_scale, bin_size=bin_size, min_val=min_val, max_val=max_val, biased_edges=biased_edges, discretize_given=discretize_given, ))