def evaluate_loop(dataset, FactorMuE_variational, latent_dims, latent_length, latent_alphabet_size, alphabet_size, transfer_mats, bt_scale, b0_scale, u_conc, r_conc, l_conc, z_distr, mc_samples, dtype, writer): """Evaluate heldout log likelihood and perplexity.""" data_size = sum(1 for i in dataset.batch(1)) heldout_likelihood = 0. heldout_log_perplex = 0. for x_batch, xlen_batch in dataset.batch(1): x = x_batch[0] xlen = xlen_batch[0] log_likelihood = 0. for rep in range(mc_samples): # Sample from variational approximation. qz, qbt, qb0, qu, qr, ql = FactorMuE_variational(x) # Forward pass. with tape() as model_tape: # Condition on variational sample. with condition(z=qz, bt=qbt, b0=qb0, u=qu, r=qr, l=ql): posterior_predictive = FactorMuE(latent_dims, latent_length, latent_alphabet_size, alphabet_size, xlen, transfer_mats, bt_scale, b0_scale, u_conc, r_conc, l_conc, z_distr=z_distr, dtype=dtype) # Compute likelihood term. log_likelihood += mue.hmm_log_prob( posterior_predictive.distribution, x, xlen) / mc_samples # Compute KL(vi posterior||prior) for local parameters. kl_local = 0. for rv_name, variational_rv in [("z", qz)]: kl_local += tf.reduce_sum( variational_rv.distribution.kl_divergence( model_tape[rv_name].distribution)) # Summary local_elbo = log_likelihood - kl_local heldout_likelihood += local_elbo heldout_log_perplex -= local_elbo / tf.cast(xlen * data_size, dtype) # Compute perplexity. heldout_perplex = tf.exp(heldout_log_perplex) # Record. with writer.as_default(): tf.summary.scalar('perplexity', heldout_perplex, step=1) return heldout_perplex, heldout_likelihood
def evaluate_loop(dataset, RegressMuE_variational, latent_dims, latent_length, latent_alphabet_size, alphabet_size, transfer_mats, bt_scale, b0_scale, u_conc, r_conc, l_conc, mc_samples, dtype, writer): """Evaluate heldout log likelihood and perplexity.""" data_size = sum(1 for i in dataset.batch(1)) heldout_likelihood = 0. heldout_log_perplex = 0. for z_batch, x_batch, xlen_batch in dataset.batch(1): z = z_batch[0] x = x_batch[0] xlen = xlen_batch[0] log_likelihood = 0. for rep in range(mc_samples): # Sample from variational approximation. qbt, qb0, qu, qr, ql = RegressMuE_variational() # Condition on variational sample. with condition(bt=qbt, b0=qb0, u=qu, r=qr, l=ql): posterior_predictive = RegressMuE( z, latent_dims, latent_length, latent_alphabet_size, alphabet_size, xlen, transfer_mats, bt_scale, b0_scale, u_conc, r_conc, l_conc, dtype=dtype) # Compute likelihood term. log_likelihood += mue.hmm_log_prob( posterior_predictive.distribution, x, xlen) / mc_samples # Summary local_elbo = log_likelihood heldout_likelihood += local_elbo heldout_log_perplex -= local_elbo/tf.cast(xlen * data_size, dtype) # Compute perplexity. heldout_perplex = tf.exp(heldout_log_perplex) # Record. with writer.as_default(): tf.summary.scalar('perplexity', heldout_perplex, step=1) return heldout_perplex, heldout_likelihood
def test_hmm_log_prob(): a0 = np.array([0.9, 0.08, 0.02]) a = np.array([[0.1, 0.8, 0.1], [0.5, 0.3, 0.2], [0.4, 0.4, 0.2]]) e = np.array([[0.99, 0.01], [0.01, 0.99], [0.5, 0.5]]) model = tfpd.HiddenMarkovModel( tfpd.Categorical( logits=tf.math.log(tf.convert_to_tensor(np.matmul(a0, a)))), tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(a))), tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(e))), 5) x = tf.convert_to_tensor( np.array([[0., 1.], [1., 0.], [0., 1.], [0., 1.], [1., 0.], [0.5, 0.5]])) xlen = tf.convert_to_tensor(5) chk_lp = mue.hmm_log_prob(model, x, xlen) f = np.matmul(a0, a) * e[:, 1] f = np.matmul(f, a) * e[:, 0] f = np.matmul(f, a) * e[:, 1] f = np.matmul(f, a) * e[:, 1] f = np.matmul(f, a) * e[:, 0] tst_lp = np.log(np.sum(f)) assert np.allclose(chk_lp.numpy(), tst_lp) # Check against (predictably incorrect) tensorflow probability # implementation. model = tfpd.HiddenMarkovModel( tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(a0))), tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(a))), tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(e))), 5) xcat = tf.convert_to_tensor([1, 0, 1, 1, 0]) tst_lp2 = model.log_prob(xcat) assert np.allclose(chk_lp.numpy(), tst_lp2.numpy())
def train_loop(dataset, RegressMuE_variational, trainable_variables, latent_dims, latent_length, latent_alphabet_size, alphabet_size, transfer_mats, bt_scale, b0_scale, u_conc, r_conc, l_conc, max_epochs, shuffle_buffer, batch_size, optimizer_name, learning_rate, dtype, writer, out_folder): """Training loop.""" # Set up the optimizer. optimizer = getattr(tf.keras.optimizers, optimizer_name)( learning_rate=learning_rate) data_size = sum(1 for i in dataset.batch(1)) # Training loop. step = 0 t0 = datetime.datetime.now() for epoch in range(max_epochs): for z_batch, x_batch, xlen_batch in dataset.shuffle( shuffle_buffer).batch(batch_size): step += 1 accum_gradients = [tf.zeros(elem.shape, dtype=dtype) for elem in trainable_variables] accum_elbo = 0. ix = -1 for z, x, xlen in zip(z_batch, x_batch, xlen_batch): ix += 1 # Track gradients. with tf.GradientTape() as gtape: # Sample from variational approximation. qbt, qb0, qu, qr, ql = RegressMuE_variational() # Forward pass. with tape() as model_tape: # Condition on variational sample. with condition(bt=qbt, b0=qb0, u=qu, r=qr, l=ql): posterior_predictive = RegressMuE( z, latent_dims, latent_length, latent_alphabet_size, alphabet_size, xlen, transfer_mats, bt_scale, b0_scale, u_conc, r_conc, l_conc, dtype=dtype) # Compute likelihood term. log_likelihood = mue.hmm_log_prob( posterior_predictive.distribution, x, xlen) # Compute KL(vi posterior||prior) for global parameters. kl_global = 0. if ix == x_batch.shape[0] - 1: for rv_name, variational_rv in [ ("bt", qbt), ("b0", qb0), ("u", qu), ("r", qr), ("l", ql)]: kl_global += tf.reduce_sum( variational_rv.distribution.kl_divergence( model_tape[rv_name].distribution)) # Compute the ELBO term, correcting for subsampling. elbo = ((data_size/x_batch.shape[0]) * log_likelihood - kl_global) # Compute gradient. loss = -elbo gradients = gtape.gradient(loss, trainable_variables) # Accumulate elbo and gradients. accum_elbo += elbo for gi, grad in enumerate(gradients): if grad is not None: accum_gradients[gi] += grad # Record. with writer.as_default(): tf.summary.scalar('elbo', accum_elbo, step=step) # Optimization step. optimizer.apply_gradients( zip(accum_gradients, trainable_variables)) print('epoch {} ({})'.format(epoch, datetime.datetime.now() - t0)) return RegressMuE_variational, trainable_variables