def test_monto_carlo_objective(self): log_p, log_q = prepare_test_payload() obj = monte_carlo_objective(log_p, log_q, axis=[0]) assert_allclose( T.reduce_mean(obj), monte_carlo_objective(log_p, log_q, axis=[0], reduction='mean'), rtol=1e-4, atol=1e-6 ) assert_allclose( T.reduce_sum(obj), monte_carlo_objective(log_p, log_q, axis=[0], reduction='sum'), rtol=1e-4, atol=1e-6 ) obj_shape = T.shape(obj) assert_allclose(obj, T.log_mean_exp(log_p - log_q, axis=[0])) obj_k = monte_carlo_objective(log_p, log_q, axis=[0], keepdims=True) assert_allclose( T.reduce_mean(obj_k), monte_carlo_objective(log_p, log_q, axis=[0], keepdims=True, reduction='mean') ) assert_allclose( T.reduce_sum(obj_k), monte_carlo_objective(log_p, log_q, axis=[0], keepdims=True, reduction='sum') ) self.assertListEqual([1] + obj_shape, T.shape(obj_k)) assert_allclose( obj_k, T.log_mean_exp(log_p - log_q, axis=[0], keepdims=True) )
def test_iwae(self): assert_allclose_ = functools.partial(assert_allclose, rtol=1e-5, atol=1e-6) x, y, z, f, log_f, log_q = prepare_test_payload(reparameterized=True) wk_hat = f / T.reduce_sum(f, axis=[0], keepdims=True) cost = iwae_estimator(log_f, axis=[0]) assert_allclose_(-cost, iwae_estimator(log_f, axis=[0], negative=True)) assert_allclose_(T.reduce_mean(cost), iwae_estimator(log_f, axis=[0], reduction='mean')) assert_allclose_(T.reduce_sum(cost), iwae_estimator(log_f, axis=[0], reduction='sum')) cost_shape = T.shape(cost) assert_allclose_( T.grad([T.reduce_sum(cost)], [y])[0], T.reduce_sum(wk_hat * (2 * x * y), axis=[0])) x, y, z, f, log_f, log_q = prepare_test_payload(reparameterized=True) wk_hat = f / T.reduce_sum(f, axis=[0], keepdims=True) cost_k = iwae_estimator(log_f, axis=[0], keepdims=True) assert_allclose_( T.reduce_mean(cost_k), iwae_estimator(log_f, axis=[0], keepdims=True, reduction='mean')) assert_allclose_( T.reduce_sum(cost_k), iwae_estimator(log_f, axis=[0], keepdims=True, reduction='sum')) assert_allclose_( -cost_k, T.to_numpy( iwae_estimator(log_f, axis=[0], keepdims=True, negative=True))) self.assertListEqual([1] + cost_shape, T.shape(cost_k)) assert_allclose_( T.grad([T.reduce_sum(cost_k)], [y])[0], T.reduce_sum(wk_hat * (2 * x * y), axis=[0]))
def train_step(x): chain = vae.get_chain(x) # loss with beta beta = min(loop.epoch / 100., 1.) log_qz_given_x = T.reduce_mean(chain.q['z'].log_prob()) log_pz = T.reduce_mean(chain.p['z'].log_prob()) log_px_given_z = T.reduce_mean(chain.p['x'].log_prob()) kl = log_pz - log_qz_given_x elbo = log_px_given_z + beta * kl # add regularization loss = -elbo + exp.config.l2_reg * T.nn.l2_regularization(params) # construct the train metrics ret = { 'loss': loss, 'kl': kl, 'log p(x|z)': log_px_given_z, 'log q(z|x)': log_qz_given_x, 'log p(z)': log_pz } if loop.epoch >= 100: ret['elbo'] = elbo return ret
def test_xavier_initializer(self): for dtype, initializer, mode in product( float_dtypes, (tk.init.xavier_normal, tk.init.xavier_uniform), (None, 'fan_in', 'fan_out'), ): weight = T.variable([n_samples // 50, 50], dtype=dtype, initializer=0.) assert_equal(weight, T.full_like(weight, 0.)) mode_arg = {'mode': mode} if mode is not None else {} # xavier fan_in, fan_out = tk.init.calculate_fan_in_and_fan_out(weight) xavier_std = np.sqrt(2.0 / float(fan_in + fan_out)) tk.init.apply_initializer(weight, initializer, **mode_arg) self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))), 5.0 / xavier_std / np.sqrt(n_samples)) # xavier with custom gain and fan_in/fan_out fan_in, fan_out = 23, 17 init_gain = 1.5 xavier_std = init_gain * np.sqrt(2.0 / float(fan_in + fan_out)) tk.init.apply_initializer(weight, initializer, fan_in_and_fan_out=(fan_in, fan_out), gain=init_gain, **mode_arg) self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))), 5.0 / xavier_std / np.sqrt(n_samples))
def test_elbo(self): log_p, log_q = prepare_test_payload() obj = elbo_objective(log_p, log_q) assert_allclose( T.reduce_mean(obj), elbo_objective(log_p, log_q, reduction='mean') ) assert_allclose( T.reduce_sum(obj), elbo_objective(log_p, log_q, reduction='sum') ) obj_shape = T.shape(obj) assert_allclose(obj, log_p - log_q) obj_r = elbo_objective(log_p, log_q, axis=[0]) self.assertListEqual(obj_shape[1:], T.shape(obj_r)) assert_allclose(obj_r, T.reduce_mean(log_p - log_q, axis=[0])) obj_rk = elbo_objective(log_p, log_q, axis=[0], keepdims=True) assert_allclose( T.reduce_mean(obj_rk), elbo_objective(log_p, log_q, axis=[0], keepdims=True, reduction='mean') ) assert_allclose( T.reduce_sum(obj_rk), elbo_objective(log_p, log_q, axis=[0], keepdims=True, reduction='sum') ) self.assertListEqual([1] + obj_shape[1:], T.shape(obj_rk)) assert_allclose( obj_rk, T.reduce_mean(log_p - log_q, axis=[0], keepdims=True) )
def eval_step(x, n_z=exp.config.test_n_z): with tk.layers.scoped_eval_mode(vae), T.no_grad(): chain = vae.get_chain(x, n_z=n_z) log_qz_given_x = T.reduce_mean(chain.q['z'].log_prob()) log_pz = T.reduce_mean(chain.p['z'].log_prob()) log_px_given_z = T.reduce_mean(chain.p['x'].log_prob()) kl = log_pz - log_qz_given_x elbo = log_px_given_z + kl nll = -chain.vi.evaluation.is_loglikelihood(reduction='mean') return {'elbo': elbo, 'nll': nll, 'kl': kl, 'log p(x|z)': log_px_given_z, 'log q(z|x)': log_qz_given_x, 'log p(z)': log_pz}
def test_monto_carlo_objective(self): log_p, log_q = prepare_test_payload() ll = importance_sampling_log_likelihood(log_p, log_q, axis=[0]) ll_shape = T.shape(ll) assert_allclose_(ll, T.log_mean_exp(log_p - log_q, axis=[0])) assert_allclose_( T.reduce_mean(ll), importance_sampling_log_likelihood(log_p, log_q, axis=[0], reduction='mean')) assert_allclose_( T.reduce_sum(ll), importance_sampling_log_likelihood(log_p, log_q, axis=[0], reduction='sum')) ll_k = importance_sampling_log_likelihood(log_p, log_q, axis=[0], keepdims=True) self.assertListEqual([1] + ll_shape, T.shape(ll_k)) assert_allclose_( ll_k, T.log_mean_exp(log_p - log_q, axis=[0], keepdims=True))
def test_kaming_initializer(self): for dtype, initializer, mode in product( float_dtypes, (tk.init.kaming_normal, tk.init.kaming_uniform), (None, 'fan_in', 'fan_out'), ): weight = T.variable([n_samples // 50, 50], dtype=dtype, initializer=0.) assert_equal(weight, T.full_like(weight, 0.)) mode_arg = {'mode': mode} if mode is not None else {} # kaming fan_in, fan_out = tk.init.calculate_fan_in_and_fan_out(weight) if mode == 'fan_out': kaming_std = np.sqrt(1.0 / np.sqrt(fan_out)) else: kaming_std = np.sqrt(1.0 / np.sqrt(fan_in)) tk.init.apply_initializer(weight, initializer, **mode_arg) self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))), 5.0 / kaming_std / np.sqrt(n_samples)) # kaming with custom gain and fan_in/fan_out fan_in, fan_out = 23, 17 init_gain = 1.5 if mode == 'fan_out': kaming_std = init_gain * np.sqrt(1.0 / np.sqrt(fan_out)) else: kaming_std = init_gain * np.sqrt(1.0 / np.sqrt(fan_in)) tk.init.apply_initializer(weight, initializer, fan_in_and_fan_out=(fan_in, fan_out), gain=init_gain, **mode_arg) self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))), 5.0 / kaming_std / np.sqrt(n_samples)) # test error with pytest.raises( ValueError, match='`mode` must be either "fan_in" or "fan_out"'): weight = T.variable([n_samples // 50, 50], dtype=dtype, initializer=0.) tk.init.apply_initializer(weight, initializer, mode='invalid')
def test_normal(self): for dtype in float_dtypes: weight = T.variable([n_samples // 50, 50], dtype=dtype, initializer=0.) assert_equal(weight, T.full_like(weight, 0.)) # uniform with default args tk.init.apply_initializer(weight, tk.init.normal) self.assertLessEqual(np.abs(T.to_numpy(T.reduce_mean(weight))), 5.0 / np.sqrt(n_samples)) # uniform with customized args tk.init.apply_initializer(weight, partial(tk.init.normal, mean=1., std=3.)) self.assertLessEqual( np.abs(T.to_numpy(T.reduce_mean(weight)) - 1.), 5.0 * 3. / np.sqrt(n_samples))
def test_uniform(self): for dtype in float_dtypes: weight = T.variable([n_samples // 50, 50], dtype=dtype, initializer=0.) assert_equal(weight, T.full_like(weight, 0.)) # uniform with default args tk.init.apply_initializer(weight, tk.init.uniform) self.assertLessEqual( np.abs(T.to_numpy(T.reduce_mean(weight)) - 0.5), 5.0 / np.sqrt(12.) / np.sqrt(n_samples)) # uniform with customized args tk.init.apply_initializer( weight, partial(tk.init.uniform, low=-4., high=3.)) self.assertLessEqual( np.abs(T.to_numpy(T.reduce_mean(weight)) - (-0.5)), 5.0 * 7.0 / np.sqrt(12.) / np.sqrt(n_samples))
def test_sgvb(self): assert_allclose_ = functools.partial(assert_allclose, rtol=1e-5, atol=1e-6) # default x, y, z, f, log_f, log_q = prepare_test_payload(reparameterized=True) cost = sgvb_estimator(f) assert_allclose_(-cost, sgvb_estimator(f, negative=True)) assert_allclose_(T.reduce_mean(cost), sgvb_estimator(f, reduction='mean')) assert_allclose_(T.reduce_sum(cost), sgvb_estimator(f, reduction='sum')) cost_shape = T.shape(cost) assert_allclose_( T.grad([T.reduce_sum(cost)], [y])[0], T.reduce_sum(2 * x * y * f, axis=[0])) x, y, z, f, log_f, log_q = prepare_test_payload(reparameterized=True) cost_r = sgvb_estimator(f, axis=[0]) assert_allclose_(-cost_r, sgvb_estimator(f, axis=[0], negative=True)) self.assertListEqual(cost_shape[1:], T.shape(cost_r)) assert_allclose_( T.grad([T.reduce_sum(cost_r)], [y])[0], T.reduce_sum(2 * x * y * f, axis=[0]) / 7) x, y, z, f, log_f, log_q = prepare_test_payload(reparameterized=True) cost_rk = sgvb_estimator(f, axis=[0], keepdims=True) assert_allclose_(T.reduce_mean(cost_rk), sgvb_estimator(f, axis=[0], reduction='mean')) assert_allclose_(T.reduce_sum(cost_rk), sgvb_estimator(f, axis=[0], reduction='sum')) assert_allclose_( -cost_rk, sgvb_estimator(f, axis=[0], keepdims=True, negative=True)) self.assertListEqual([1] + cost_shape[1:], T.shape(cost_rk)) assert_allclose_( T.grad([T.reduce_sum(cost_rk)], [y])[0], T.reduce_sum(2 * x * y * f, axis=[0]) / 7)
def test_MeanAveraging(self): # step-wise check factory = tk.optim.WeightMeanAveraging def update_fn(old_val, new_val, num_updates): return (old_val * num_updates + new_val) / (num_updates + 1.) def get_fn(val, num_updates): return val stepwise_average_check(self, factory, update_fn, get_fn) # overall check input_x = T.random.randn([7, 4]) full_scan_average_check( self, factory, input_x, T.reduce_mean(input_x, axis=[0]))
def calculate_acc(logits: T.Tensor, y: T.Tensor) -> T.Tensor: with T.no_grad(): out_y = T.argmax(logits, axis=-1) return T.reduce_mean(T.cast(T.equal(out_y, y), dtype=T.float32))