def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computational graph. No ops should be created outside the call to ``initialize()``. Parameters ---------- n_iter : int, optional Number of iterations for algorithm. n_print : int, optional Number of iterations for each print progress. To suppress print progress, then specify 0. Default is ``int(n_iter / 10)``. scale : dict of RandomVariable to tf.Tensor, optional A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. logdir : str, optional Directory where event file will be written. For details, see ``tf.summary.FileWriter``. Default is to write nothing. debug : bool, optional If True, add checks for ``NaN`` and ``Inf`` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 10) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale if logdir is not None: self.logging = True self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) self.summarize = tf.summary.merge_all() else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops()
def main(_): ed.set_seed(42) # DATA. MNIST batches are fed at training time. (x_train, _), (x_test, _) = mnist(FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.M) # MODEL # Define a subgraph of the full model, corresponding to a minibatch of # size M. z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]), scale=tf.ones([FLAGS.M, FLAGS.d])) hidden = tf.layers.dense(z, 256, activation=tf.nn.relu) x = Bernoulli(logits=tf.layers.dense(hidden, 28 * 28)) # INFERENCE # Define a subgraph of the variational model, corresponding to a # minibatch of size M. x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28]) hidden = tf.layers.dense(tf.cast(x_ph, tf.float32), 256, activation=tf.nn.relu) qz = Normal(loc=tf.layers.dense(hidden, FLAGS.d), scale=tf.layers.dense(hidden, FLAGS.d, activation=tf.nn.softplus)) # Bind p(x, z) and q(z | x) to the same TensorFlow placeholder for x. inference = ed.KLqp({z: qz}, data={x: x_ph}) optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer) tf.global_variables_initializer().run() n_iter_per_epoch = x_train.shape[0] // FLAGS.M for epoch in range(1, FLAGS.n_epoch + 1): print("Epoch: {0}".format(epoch)) avg_loss = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) avg_loss += info_dict['loss'] # Print a lower bound to the average marginal likelihood for an # image. avg_loss /= n_iter_per_epoch avg_loss /= FLAGS.M print("-log p(x) <= {:0.3f}".format(avg_loss)) # Prior predictive check. images = x.eval() for m in range(FLAGS.M): imsave( os.path.join(FLAGS.out_dir, '%d.png') % m, images[m].reshape(28, 28))
def main(_): ed.set_seed(42) # DATA. MNIST batches are fed at training time. (x_train, _), (x_test, _) = mnist(FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.M) # MODEL z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]), scale=tf.ones([FLAGS.M, FLAGS.d])) logits = generative_network(z) x = Bernoulli(logits=logits) # INFERENCE x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28]) loc, scale = inference_network(tf.cast(x_ph, tf.float32)) qz = Normal(loc=loc, scale=scale) # Bind p(x, z) and q(z | x) to the same placeholder for x. inference = ed.KLqp({z: qz}, data={x: x_ph}) optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer) hidden_rep = tf.sigmoid(logits) tf.global_variables_initializer().run() n_iter_per_epoch = x_train.shape[0] // FLAGS.M for epoch in range(1, FLAGS.n_epoch + 1): print("Epoch: {0}".format(epoch)) avg_loss = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) avg_loss += info_dict['loss'] # Print a lower bound to the average marginal likelihood for an # image. avg_loss /= n_iter_per_epoch avg_loss /= FLAGS.M print("-log p(x) <= {:0.3f}".format(avg_loss)) # Visualize hidden representations. images = hidden_rep.eval() for m in range(FLAGS.M): imsave( os.path.join(FLAGS.out_dir, '%d.png') % m, images[m].reshape(28, 28))
def initialize(self, n_iter=1000, n_print=None, scale=None, auto_transform=True, logdir=None, log_timestamp=True, log_vars=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Any derived class of `Inference` **must** implement this method. No methods which build ops should be called outside `initialize()`. Args: n_iter: int, optional. Number of iterations for algorithm when calling `run()`. Alternatively if controlling inference manually, it is the expected number of calls to `update()`; this number determines tracking information during the print progress. n_print: int, optional. Number of iterations for each print progress. To suppress print progress, then specify 0. Default is `int(n_iter / 100)`. scale: dict of RandomVariable to tf.Tensor, optional. A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. auto_transform: bool, optional. Whether to automatically transform continuous latent variables of unequal support to be on the unconstrained space. It is only applied if the argument is `True`, the latent variable pair are `ed.RandomVariable`s with the `support` attribute, the supports are both continuous and unequal. logdir: str, optional. Directory where event file will be written. For details, see `tf.summary.FileWriter`. Default is to log nothing. log_timestamp: bool, optional. If True (and `logdir` is specified), create a subdirectory of `logdir` to save the specific run results. The subdirectory's name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'. log_vars: list, optional. Specifies the list of variables to log after each `n_print` steps. If None, will log all variables. If `[]`, no variables will be logged. `logdir` must be specified for variables to be logged. debug: bool, optional. If True, add checks for `NaN` and `Inf` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale # map from original latent vars to unconstrained versions self.transformations = {} if auto_transform: latent_vars = self.latent_vars.copy() # latent_vars maps original latent vars to constrained Q's. # latent_vars_unconstrained maps unconstrained vars to unconstrained Q's. self.latent_vars = {} self.latent_vars_unconstrained = {} for z, qz in six.iteritems(latent_vars): if hasattr(z, 'support') and hasattr(qz, 'support') and \ z.support != qz.support and qz.support != 'point': # transform z to an unconstrained space z_unconstrained = transform(z) self.transformations[z] = z_unconstrained # make sure we also have a qz that covers the unconstrained space if qz.support == "points": qz_unconstrained = qz else: qz_unconstrained = transform(qz) self.latent_vars_unconstrained[ z_unconstrained] = qz_unconstrained # additionally construct the transformation of qz # back into the original constrained space if z_unconstrained != z: qz_constrained = transform( qz_unconstrained, bijectors.Invert(z_unconstrained.bijector)) try: # attempt to pushforward the params of Empirical distributions qz_constrained.params = z_unconstrained.bijector.inverse( qz_unconstrained.params) except: # qz_unconstrained is not an Empirical distribution pass else: qz_constrained = qz_unconstrained self.latent_vars[z] = qz_constrained else: self.latent_vars[z] = qz self.latent_vars_unconstrained[z] = qz del latent_vars if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.expanduser(logdir) logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._summary_key = tf.get_default_graph().unique_name("summaries") self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() # Store reset ops which user can call. Subclasses should append # any ops needed to reset internal variables in inference. self.reset = [tf.variables_initializer([self.t])]
data = {x: x_ph} inference = ed.ReparameterizationKLKLqp({z: qz}, data) optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer, use_prettytensor=True) hidden_rep = tf.sigmoid(logits) init = tf.global_variables_initializer() init.run() n_epoch = 100 n_iter_per_epoch = 1000 for epoch in range(n_epoch): avg_loss = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_train, _ = mnist.train.next_batch(M) info_dict = inference.update(feed_dict={x_ph: x_train}) avg_loss += info_dict['loss'] # Print a lower bound to the average marginal likelihood for an # image. avg_loss = avg_loss / n_iter_per_epoch avg_loss = avg_loss / M print("log p(x) >= {:0.3f}".format(avg_loss)) # Visualize hidden representations. imgs = hidden_rep.eval() for m in range(M):
def initialize(self, n_iter=1000, n_print=None, scale=None, logdir=None, log_timestamp=True, log_vars=None, debug=False): """Initialize inference algorithm. It initializes hyperparameters and builds ops for the algorithm's computation graph. Any derived class of ``Inference`` **must** implement this method. No methods which build ops should be called outside ``initialize()``. Parameters ---------- n_iter : int, optional Number of iterations for algorithm. n_print : int, optional Number of iterations for each print progress. To suppress print progress, then specify 0. Default is ``int(n_iter / 100)``. scale : dict of RandomVariable to tf.Tensor, optional A tensor to scale computation for any random variable that it is binded to. Its shape must be broadcastable; it is multiplied element-wise to the random variable. For example, this is useful for mini-batch scaling when inferring global variables, or applying masks on a random variable. logdir : str, optional Directory where event file will be written. For details, see ``tf.summary.FileWriter``. Default is to log nothing. log_timestamp : bool, optional If True (and ``logdir`` is specified), create a subdirectory of ``logdir`` to save the specific run results. The subdirectory's name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'. log_vars : list, optional Specifies the list of variables to log after each ``n_print`` steps. If None, will log all variables. If ``[]``, no variables will be logged. ``logdir`` must be specified for variables to be logged. debug : bool, optional If True, add checks for ``NaN`` and ``Inf`` to all computations in the graph. May result in substantially slower execution times. """ self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) self.summarize = tf.summary.merge_all() else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() # Store reset ops which user can call. Subclasses should append # any ops needed to reset internal variables in inference. self.reset = [tf.variables_initializer([self.t])]
def main(_): ed.set_seed(42) # DATA x_train, metadata = nips(FLAGS.data_dir) documents = metadata['columns'] words = metadata['rows'] # Subset to documents in 2011 and words appearing in at least two # documents and have a total word count of at least 10. doc_idx = [ i for i, document in enumerate(documents) if document.startswith('2011') ] documents = [documents[doc] for doc in doc_idx] x_train = x_train[:, doc_idx] word_idx = np.logical_and( np.sum(x_train != 0, 1) >= 2, np.sum(x_train, 1) >= 10) words = [word for word, idx in zip(words, word_idx) if idx] x_train = x_train[word_idx, :] x_train = x_train.T N = x_train.shape[0] # number of documents D = x_train.shape[1] # vocabulary size # MODEL W2 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[2], FLAGS.K[1]]) W1 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[1], FLAGS.K[0]]) W0 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[0], D]) z3 = Gamma(0.1, 0.1, sample_shape=[N, FLAGS.K[2]]) z2 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z3, W2)) z1 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z2, W1)) x = Poisson(tf.matmul(z1, W0)) # INFERENCE qW2 = pointmass_q(W2.shape) qW1 = pointmass_q(W1.shape) qW0 = pointmass_q(W0.shape) if FLAGS.q == 'gamma': qz3 = gamma_q(z3.shape) qz2 = gamma_q(z2.shape) qz1 = gamma_q(z1.shape) else: qz3 = lognormal_q(z3.shape) qz2 = lognormal_q(z2.shape) qz1 = lognormal_q(z1.shape) # We apply variational EM with E-step over local variables # and M-step to point estimate the global weight matrices. inference_e = ed.KLqp({ z1: qz1, z2: qz2, z3: qz3 }, data={ x: x_train, W0: qW0, W1: qW1, W2: qW2 }) inference_m = ed.MAP({ W0: qW0, W1: qW1, W2: qW2 }, data={ x: x_train, z1: qz1, z2: qz2, z3: qz3 }) optimizer_e = tf.train.RMSPropOptimizer(FLAGS.lr) optimizer_m = tf.train.RMSPropOptimizer(FLAGS.lr) kwargs = { 'optimizer': optimizer_e, 'n_print': 100, 'logdir': FLAGS.logdir, 'log_timestamp': False } if FLAGS.q == 'gamma': kwargs['n_samples'] = 30 inference_e.initialize(**kwargs) inference_m.initialize(optimizer=optimizer_m) sess = ed.get_session() tf.global_variables_initializer().run() n_epoch = 20 n_iter_per_epoch = 10000 for epoch in range(n_epoch): print("Epoch {}".format(epoch)) nll = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) info_dict_e = inference_e.update() info_dict_m = inference_m.update() nll += info_dict_e['loss'] # Compute perplexity averaged over a number of training iterations. # The model's negative log-likelihood of data is upper bounded by # the variational objective. nll /= n_iter_per_epoch perplexity = np.exp(nll / np.sum(x_train)) print("Negative log-likelihood <= {:0.3f}".format(nll)) print("Perplexity <= {:0.3f}".format(perplexity)) # Print top 10 words for first 10 topics. qW0_vals = sess.run(qW0) for k in range(10): top_words_idx = qW0_vals[k, :].argsort()[-10:][::-1] top_words = " ".join([words[i] for i in top_words_idx]) print("Topic {}: {}".format(k, top_words))
def main(_): ed.set_seed(42) # DATA (x_train, _), (x_test, _), (x_valid, _) = caltech101_silhouettes(FLAGS.data_dir) x_train_generator = generator(x_train, FLAGS.batch_size) x_ph = tf.placeholder(tf.int32, [None, 28 * 28]) # MODEL zs = [0] * len(FLAGS.hidden_sizes) for l in reversed(range(len(FLAGS.hidden_sizes))): if l == len(FLAGS.hidden_sizes) - 1: logits = tf.zeros([tf.shape(x_ph)[0], FLAGS.hidden_sizes[l]]) else: logits = tf.layers.dense(tf.cast(zs[l + 1], tf.float32), FLAGS.hidden_sizes[l], activation=None) zs[l] = Bernoulli(logits=logits) x = Bernoulli(logits=tf.layers.dense( tf.cast(zs[0], tf.float32), 28 * 28, activation=None)) # INFERENCE # Define variational model with reverse ordering as probability model: # if p is 15-100-300 from top-down, q is 300-100-15 from bottom-up. qzs = [0] * len(FLAGS.hidden_sizes) for l in range(len(FLAGS.hidden_sizes)): if l == 0: logits = tf.layers.dense(tf.cast(x_ph, tf.float32), FLAGS.hidden_sizes[l], activation=None) else: logits = tf.layers.dense(tf.cast(qzs[l - 1], tf.float32), FLAGS.hidden_sizes[l], activation=None) qzs[l] = Bernoulli(logits=logits) inference = ed.KLqp({z: qz for z, qz in zip(zs, qzs)}, data={x: x_ph}) optimizer = tf.train.AdamOptimizer(FLAGS.step_size) inference.initialize(optimizer=optimizer, n_samples=FLAGS.n_train_samples) # Build tensor for log-likelihood given one variational sample to run # on test data. x_post = ed.copy(x, {z: qz for z, qz in zip(zs, qzs)}) x_neg_log_prob = (-tf.reduce_sum(x_post.log_prob(x_ph)) / tf.cast(tf.shape(x_ph)[0], tf.float32)) sess = ed.get_session() tf.global_variables_initializer().run() for epoch in range(FLAGS.n_epoch): print("Epoch {}".format(epoch)) train_loss = 0.0 pbar = Progbar(FLAGS.n_iter_per_epoch) for t in range(1, FLAGS.n_iter_per_epoch + 1): pbar.update(t) x_batch = next(x_train_generator) info_dict = inference.update(feed_dict={x_ph: x_batch}) train_loss += info_dict['loss'] # Print per-data point loss, averaged over training epoch. train_loss /= FLAGS.n_iter_per_epoch train_loss /= FLAGS.batch_size print("Training negative log-likelihood: {:0.3f}".format(train_loss)) test_loss = [ sess.run(x_neg_log_prob, {x_ph: x_test}) for _ in range(FLAGS.n_test_samples) ] test_loss = np.mean(test_loss) print("Test negative log-likelihood: {:0.3f}".format(test_loss)) # Prior predictive check. images = sess.run(x, {x_ph: x_batch}) # feed ph to determine sample size for m in range(FLAGS.batch_size): imsave("{}/{}.png".format(out_dir, m), images[m].reshape(28, 28))
def initialize(self, n_iter=1000, n_print=None, scale=None, auto_transform=True, logdir=None, log_timestamp=True, log_vars=None, debug=False, optimizer=None, var_list=None, use_prettytensor=False, global_step=None, n_samples=1, kl_scaling=None, maxnorm=5.): if kl_scaling is None: kl_scaling = {} if n_samples <= 0: raise ValueError( "n_samples should be greater than zero: {}".format(n_samples)) self.n_samples = n_samples self.kl_scaling = kl_scaling # from inference.py self.n_iter = n_iter if n_print is None: self.n_print = int(n_iter / 100) else: self.n_print = n_print self.progbar = Progbar(self.n_iter) self.t = tf.Variable(0, trainable=False, name="iteration") self.increment_t = self.t.assign_add(1) if scale is None: scale = {} elif not isinstance(scale, dict): raise TypeError("scale must be a dict object.") self.scale = scale self.transformations = {} if auto_transform: latent_vars = self.latent_vars.copy() self.latent_vars = {} self.latent_vars_unconstrained = {} for z, qz in six.iteritems(latent_vars): if hasattr(z, 'support') and hasattr(qz, 'support') and \ z.support != qz.support and qz.support != 'point': z_unconstrained = transform(z) self.transformations[z] = z_unconstrained if qz.support == "points": qz_unconstrained = qz else: qz_unconstrained = transform(qz) self.latent_vars_unconstrained[ z_unconstrained] = qz_unconstrained if z_unconstrained != z: qz_constrained = transform( qz_unconstrained, bijectors.Invert(z_unconstrained.bijector)) try: qz_constrained.params = \ z_unconstrained.bijector.inverse( qz_unconstrained.params) except: pass else: qz_constrained = qz_unconstrained self.latent_vars[z] = qz_constrained else: self.latent_vars[z] = qz self.latent_vars_unconstrained[z] = qz del latent_vars if logdir is not None: self.logging = True if log_timestamp: logdir = os.path.expanduser(logdir) logdir = os.path.join( logdir, datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S")) self._summary_key = tf.get_default_graph().unique_name("summaries") self._set_log_variables(log_vars) self.train_writer = tf.summary.FileWriter(logdir, tf.get_default_graph()) else: self.logging = False self.debug = debug if self.debug: self.op_check = tf.add_check_numerics_ops() self.reset = [tf.variables_initializer([self.t])] # from variational_inference.py if var_list is None: var_list = set() trainables = tf.trainable_variables() for z, qz in six.iteritems(self.latent_vars): var_list.update(get_variables(z, collection=trainables)) var_list.update(get_variables(qz, collection=trainables)) for x, qx in six.iteritems(self.data): if isinstance(x, RandomVariable) and \ not isinstance(qx, RandomVariable): var_list.update(get_variables(x, collection=trainables)) var_list = list(var_list) self.loss, grads_and_vars = self.build_loss_and_gradients(var_list) clipped_grads_and_vars = [] for grad, var in grads_and_vars: if "kernel" in var.name or "bias" in var.name: clipped_grads_and_vars.append((tf.clip_by_norm(grad, maxnorm, axes=[0]), var)) else: clipped_grads_and_vars.append((grad, var)) # for grad, var in grads_and_vars: # clipped_grads_and_vars.append( # (tf.clip_by_value(grad, -1000., 1000.), var)) del grads_and_vars if self.logging: tf.summary.scalar("loss", self.loss, collections=[self._summary_key]) for grad, var in clipped_grads_and_vars: tf.summary.histogram("gradient/" + var.name.replace(':', '/'), grad, collections=[self._summary_key]) tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'), tf.norm(grad), collections=[self._summary_key]) self.summarize = tf.summary.merge_all(key=self._summary_key) if optimizer is None and global_step is None: global_step = tf.Variable(0, trainable=False, name="global_step") if isinstance(global_step, tf.Variable): starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 100, 0.9, staircase=True) else: learning_rate = 0.01 # Build optimizer. if optimizer is None: optimizer = tf.train.AdamOptimizer(learning_rate) elif isinstance(optimizer, str): if optimizer == 'gradientdescent': optimizer = tf.train.GradientDescentOptimizer(learning_rate) elif optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer(learning_rate) elif optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate) elif optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) elif optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate) elif optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer(learning_rate) elif optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate) else: raise ValueError('Optimizer class not found:', optimizer) elif not isinstance(optimizer, tf.train.Optimizer): raise TypeError( "Optimizer must be str, tf.train.Optimizer, or None.") with tf.variable_scope(None, default_name="optimizer") as scope: if not use_prettytensor: self.train = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step) else: import prettytensor as pt self.train = pt.apply_optimizer(optimizer, losses=[self.loss], global_step=global_step, var_list=var_list) self.reset.append( tf.variables_initializer( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope.name)))
def main(_): ed.set_seed(42) # DATA x_train, _, x_test = text8(FLAGS.data_dir) vocab = string.ascii_lowercase + ' ' vocab_size = len(vocab) encoder = dict(zip(vocab, range(vocab_size))) decoder = {v: k for k, v in encoder.items()} data = generator(x_train, FLAGS.batch_size, FLAGS.timesteps, encoder) # MODEL x_ph = tf.placeholder(tf.int32, [None, FLAGS.timesteps]) with tf.variable_scope("language_model"): # Shift input sequence to right by 1, [0, x[0], ..., x[timesteps - 2]]. x_ph_shift = tf.pad(x_ph, [[0, 0], [1, 0]])[:, :-1] x = language_model(x_ph_shift, vocab_size) with tf.variable_scope("language_model", reuse=True): x_gen = language_model_gen(5, vocab_size) imb = range(0, len(x_test) - FLAGS.timesteps, FLAGS.timesteps) encoded_x_test = np.asarray( [[encoder[c] for c in x_test[i:(i + FLAGS.timesteps)]] for i in imb], dtype=np.int32) test_size = encoded_x_test.shape[0] print("Test set shape: {}".format(encoded_x_test.shape)) test_nll = -tf.reduce_sum(x.log_prob(x_ph)) # INFERENCE inference = ed.MAP({}, {x: x_ph}) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr) inference.initialize(optimizer=optimizer, logdir=FLAGS.log_dir, log_timestamp=False) print("Number of sets of parameters: {}".format( len(tf.trainable_variables()))) print("Number of parameters: {}".format( np.sum([np.prod(v.shape.as_list()) for v in tf.trainable_variables()]))) for v in tf.trainable_variables(): print(v) sess = ed.get_session() tf.global_variables_initializer().run() # Double n_epoch and print progress every half an epoch. n_iter_per_epoch = len(x_train) // (FLAGS.batch_size * FLAGS.timesteps * 2) epoch = 0.0 for _ in range(FLAGS.n_epoch * 2): epoch += 0.5 print("Epoch: {0}".format(epoch)) avg_nll = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1): pbar.update(t) x_batch = next(data) info_dict = inference.update({x_ph: x_batch}) avg_nll += info_dict['loss'] # Print average bits per character over epoch. avg_nll /= (n_iter_per_epoch * FLAGS.batch_size * FLAGS.timesteps * np.log(2)) print("Train average bits/char: {:0.8f}".format(avg_nll)) # Print per-data point log-likelihood on test set. avg_nll = 0.0 for start in range(0, test_size, batch_size): end = min(test_size, start + batch_size) x_batch = encoded_x_test[start:end] avg_nll += sess.run(test_nll, {x_ph: x_batch}) avg_nll /= test_size print("Test average NLL: {:0.8f}".format(avg_nll)) # Generate samples from model. samples = sess.run(x_gen) samples = [''.join([decoder[c] for c in sample]) for sample in samples] print("Samples:") for sample in samples: print(sample)
### predictive check n_rep = 100 # number of replicated datasets we generate holdout_gen = np.zeros((n_rep, x_train.shape[0], x_train.shape[1])) for i in range(n_rep): x_generated = x_post.sample().eval() # look only at the heldout entries holdout_gen[i] = np.multiply(x_generated, holdout_mask) n_eval = 10 # we draw samples from the inferred Z and W obs_ll = [] rep_ll = [] pbar = Progbar(n_eval) for j in range(n_eval): U_sample = U_post.sample().eval() V_sample = V_post.sample().eval() holdoutmean_sample = np.multiply(U_sample.dot(V_sample.T), holdout_mask) obs_ll.append( np.mean(np.ma.masked_invalid( stats.poisson.logpmf(np.array(x_vad, dtype=int), holdoutmean_sample)), axis=1)) rep_ll.append( np.mean(np.ma.masked_invalid( stats.poisson.logpmf(holdout_gen, holdoutmean_sample)), axis=2))