示例#1
0
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   logdir=None,
                   debug=False):
        """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computational graph. No ops
    should be created outside the call to ``initialize()``.

    Parameters
    ----------
    n_iter : int, optional
      Number of iterations for algorithm.
    n_print : int, optional
      Number of iterations for each print progress. To suppress print
      progress, then specify 0. Default is ``int(n_iter / 10)``.
    scale : dict of RandomVariable to tf.Tensor, optional
      A tensor to scale computation for any random variable that it is
      binded to. Its shape must be broadcastable; it is multiplied
      element-wise to the random variable. For example, this is useful
      for mini-batch scaling when inferring global variables, or
      applying masks on a random variable.
    logdir : str, optional
      Directory where event file will be written. For details,
      see ``tf.summary.FileWriter``. Default is to write nothing.
    debug : bool, optional
      If True, add checks for ``NaN`` and ``Inf`` to all computations
      in the graph. May result in substantially slower execution
      times.
    """
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 10)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")

        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")

        self.scale = scale

        if logdir is not None:
            self.logging = True
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
            self.summarize = tf.summary.merge_all()
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()
示例#2
0
def main(_):
    ed.set_seed(42)

    # DATA. MNIST batches are fed at training time.
    (x_train, _), (x_test, _) = mnist(FLAGS.data_dir)
    x_train_generator = generator(x_train, FLAGS.M)

    # MODEL
    # Define a subgraph of the full model, corresponding to a minibatch of
    # size M.
    z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]),
               scale=tf.ones([FLAGS.M, FLAGS.d]))
    hidden = tf.layers.dense(z, 256, activation=tf.nn.relu)
    x = Bernoulli(logits=tf.layers.dense(hidden, 28 * 28))

    # INFERENCE
    # Define a subgraph of the variational model, corresponding to a
    # minibatch of size M.
    x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28])
    hidden = tf.layers.dense(tf.cast(x_ph, tf.float32),
                             256,
                             activation=tf.nn.relu)
    qz = Normal(loc=tf.layers.dense(hidden, FLAGS.d),
                scale=tf.layers.dense(hidden,
                                      FLAGS.d,
                                      activation=tf.nn.softplus))

    # Bind p(x, z) and q(z | x) to the same TensorFlow placeholder for x.
    inference = ed.KLqp({z: qz}, data={x: x_ph})
    optimizer = tf.train.RMSPropOptimizer(0.01, epsilon=1.0)
    inference.initialize(optimizer=optimizer)

    tf.global_variables_initializer().run()

    n_iter_per_epoch = x_train.shape[0] // FLAGS.M
    for epoch in range(1, FLAGS.n_epoch + 1):
        print("Epoch: {0}".format(epoch))
        avg_loss = 0.0

        pbar = Progbar(n_iter_per_epoch)
        for t in range(1, n_iter_per_epoch + 1):
            pbar.update(t)
            x_batch = next(x_train_generator)
            info_dict = inference.update(feed_dict={x_ph: x_batch})
            avg_loss += info_dict['loss']

        # Print a lower bound to the average marginal likelihood for an
        # image.
        avg_loss /= n_iter_per_epoch
        avg_loss /= FLAGS.M
        print("-log p(x) <= {:0.3f}".format(avg_loss))

        # Prior predictive check.
        images = x.eval()
        for m in range(FLAGS.M):
            imsave(
                os.path.join(FLAGS.out_dir, '%d.png') % m,
                images[m].reshape(28, 28))
示例#3
0
def main(_):
    ed.set_seed(42)

    # DATA. MNIST batches are fed at training time.
    (x_train, _), (x_test, _) = mnist(FLAGS.data_dir)
    x_train_generator = generator(x_train, FLAGS.M)

    # MODEL
    z = Normal(loc=tf.zeros([FLAGS.M, FLAGS.d]),
               scale=tf.ones([FLAGS.M, FLAGS.d]))
    logits = generative_network(z)
    x = Bernoulli(logits=logits)

    # INFERENCE
    x_ph = tf.placeholder(tf.int32, [FLAGS.M, 28 * 28])
    loc, scale = inference_network(tf.cast(x_ph, tf.float32))
    qz = Normal(loc=loc, scale=scale)

    # Bind p(x, z) and q(z | x) to the same placeholder for x.
    inference = ed.KLqp({z: qz}, data={x: x_ph})
    optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
    inference.initialize(optimizer=optimizer)

    hidden_rep = tf.sigmoid(logits)

    tf.global_variables_initializer().run()

    n_iter_per_epoch = x_train.shape[0] // FLAGS.M
    for epoch in range(1, FLAGS.n_epoch + 1):
        print("Epoch: {0}".format(epoch))
        avg_loss = 0.0

        pbar = Progbar(n_iter_per_epoch)
        for t in range(1, n_iter_per_epoch + 1):
            pbar.update(t)
            x_batch = next(x_train_generator)
            info_dict = inference.update(feed_dict={x_ph: x_batch})
            avg_loss += info_dict['loss']

        # Print a lower bound to the average marginal likelihood for an
        # image.
        avg_loss /= n_iter_per_epoch
        avg_loss /= FLAGS.M
        print("-log p(x) <= {:0.3f}".format(avg_loss))

        # Visualize hidden representations.
        images = hidden_rep.eval()
        for m in range(FLAGS.M):
            imsave(
                os.path.join(FLAGS.out_dir, '%d.png') % m,
                images[m].reshape(28, 28))
示例#4
0
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   auto_transform=True,
                   logdir=None,
                   log_timestamp=True,
                   log_vars=None,
                   debug=False):
        """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Any derived class of `Inference` **must** implement this method.
    No methods which build ops should be called outside `initialize()`.

    Args:
      n_iter: int, optional.
        Number of iterations for algorithm when calling `run()`.
        Alternatively if controlling inference manually, it is the
        expected number of calls to `update()`; this number determines
        tracking information during the print progress.
      n_print: int, optional.
        Number of iterations for each print progress. To suppress print
        progress, then specify 0. Default is `int(n_iter / 100)`.
      scale: dict of RandomVariable to tf.Tensor, optional.
        A tensor to scale computation for any random variable that it is
        binded to. Its shape must be broadcastable; it is multiplied
        element-wise to the random variable. For example, this is useful
        for mini-batch scaling when inferring global variables, or
        applying masks on a random variable.
      auto_transform: bool, optional.
        Whether to automatically transform continuous latent variables
        of unequal support to be on the unconstrained space. It is
        only applied if the argument is `True`, the latent variable
        pair are `ed.RandomVariable`s with the `support` attribute,
        the supports are both continuous and unequal.
      logdir: str, optional.
        Directory where event file will be written. For details,
        see `tf.summary.FileWriter`. Default is to log nothing.
      log_timestamp: bool, optional.
        If True (and `logdir` is specified), create a subdirectory of
        `logdir` to save the specific run results. The subdirectory's
        name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'.
      log_vars: list, optional.
        Specifies the list of variables to log after each `n_print`
        steps. If None, will log all variables. If `[]`, no variables
        will be logged. `logdir` must be specified for variables to be
        logged.
      debug: bool, optional.
        If True, add checks for `NaN` and `Inf` to all computations
        in the graph. May result in substantially slower execution
        times.
    """
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 100)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")

        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")

        self.scale = scale

        # map from original latent vars to unconstrained versions
        self.transformations = {}
        if auto_transform:
            latent_vars = self.latent_vars.copy()
            # latent_vars maps original latent vars to constrained Q's.
            # latent_vars_unconstrained maps unconstrained vars to unconstrained Q's.
            self.latent_vars = {}
            self.latent_vars_unconstrained = {}
            for z, qz in six.iteritems(latent_vars):
                if hasattr(z, 'support') and hasattr(qz, 'support') and \
                        z.support != qz.support and qz.support != 'point':

                    # transform z to an unconstrained space
                    z_unconstrained = transform(z)
                    self.transformations[z] = z_unconstrained

                    # make sure we also have a qz that covers the unconstrained space
                    if qz.support == "points":
                        qz_unconstrained = qz
                    else:
                        qz_unconstrained = transform(qz)
                    self.latent_vars_unconstrained[
                        z_unconstrained] = qz_unconstrained

                    # additionally construct the transformation of qz
                    # back into the original constrained space
                    if z_unconstrained != z:
                        qz_constrained = transform(
                            qz_unconstrained,
                            bijectors.Invert(z_unconstrained.bijector))

                        try:  # attempt to pushforward the params of Empirical distributions
                            qz_constrained.params = z_unconstrained.bijector.inverse(
                                qz_unconstrained.params)
                        except:  # qz_unconstrained is not an Empirical distribution
                            pass

                    else:
                        qz_constrained = qz_unconstrained

                    self.latent_vars[z] = qz_constrained
                else:
                    self.latent_vars[z] = qz
                    self.latent_vars_unconstrained[z] = qz
            del latent_vars

        if logdir is not None:
            self.logging = True
            if log_timestamp:
                logdir = os.path.expanduser(logdir)
                logdir = os.path.join(
                    logdir,
                    datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

            self._summary_key = tf.get_default_graph().unique_name("summaries")
            self._set_log_variables(log_vars)
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()

        # Store reset ops which user can call. Subclasses should append
        # any ops needed to reset internal variables in inference.
        self.reset = [tf.variables_initializer([self.t])]
data = {x: x_ph}
inference = ed.ReparameterizationKLKLqp({z: qz}, data)
optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
inference.initialize(optimizer=optimizer, use_prettytensor=True)

hidden_rep = tf.sigmoid(logits)

init = tf.global_variables_initializer()
init.run()

n_epoch = 100
n_iter_per_epoch = 1000
for epoch in range(n_epoch):
    avg_loss = 0.0

    pbar = Progbar(n_iter_per_epoch)
    for t in range(1, n_iter_per_epoch + 1):
        pbar.update(t)
        x_train, _ = mnist.train.next_batch(M)
        info_dict = inference.update(feed_dict={x_ph: x_train})
        avg_loss += info_dict['loss']

    # Print a lower bound to the average marginal likelihood for an
    # image.
    avg_loss = avg_loss / n_iter_per_epoch
    avg_loss = avg_loss / M
    print("log p(x) >= {:0.3f}".format(avg_loss))

    # Visualize hidden representations.
    imgs = hidden_rep.eval()
    for m in range(M):
示例#6
0
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   logdir=None,
                   log_timestamp=True,
                   log_vars=None,
                   debug=False):
        """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Any derived class of ``Inference`` **must** implement this method.
    No methods which build ops should be called outside ``initialize()``.

    Parameters
    ----------
    n_iter : int, optional
      Number of iterations for algorithm.
    n_print : int, optional
      Number of iterations for each print progress. To suppress print
      progress, then specify 0. Default is ``int(n_iter / 100)``.
    scale : dict of RandomVariable to tf.Tensor, optional
      A tensor to scale computation for any random variable that it is
      binded to. Its shape must be broadcastable; it is multiplied
      element-wise to the random variable. For example, this is useful
      for mini-batch scaling when inferring global variables, or
      applying masks on a random variable.
    logdir : str, optional
      Directory where event file will be written. For details,
      see ``tf.summary.FileWriter``. Default is to log nothing.
    log_timestamp : bool, optional
      If True (and ``logdir`` is specified), create a subdirectory of
      ``logdir`` to save the specific run results. The subdirectory's
      name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'.
    log_vars : list, optional
      Specifies the list of variables to log after each ``n_print``
      steps. If None, will log all variables. If ``[]``, no variables
      will be logged. ``logdir`` must be specified for variables to be
      logged.
    debug : bool, optional
      If True, add checks for ``NaN`` and ``Inf`` to all computations
      in the graph. May result in substantially slower execution
      times.
    """
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 100)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")

        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")

        self.scale = scale

        if logdir is not None:
            self.logging = True
            if log_timestamp:
                logdir = os.path.join(
                    logdir,
                    datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

            self._set_log_variables(log_vars)
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
            self.summarize = tf.summary.merge_all()
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()

        # Store reset ops which user can call. Subclasses should append
        # any ops needed to reset internal variables in inference.
        self.reset = [tf.variables_initializer([self.t])]
示例#7
0
def main(_):
    ed.set_seed(42)

    # DATA
    x_train, metadata = nips(FLAGS.data_dir)
    documents = metadata['columns']
    words = metadata['rows']

    # Subset to documents in 2011 and words appearing in at least two
    # documents and have a total word count of at least 10.
    doc_idx = [
        i for i, document in enumerate(documents)
        if document.startswith('2011')
    ]
    documents = [documents[doc] for doc in doc_idx]
    x_train = x_train[:, doc_idx]
    word_idx = np.logical_and(
        np.sum(x_train != 0, 1) >= 2,
        np.sum(x_train, 1) >= 10)
    words = [word for word, idx in zip(words, word_idx) if idx]
    x_train = x_train[word_idx, :]
    x_train = x_train.T

    N = x_train.shape[0]  # number of documents
    D = x_train.shape[1]  # vocabulary size

    # MODEL
    W2 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[2], FLAGS.K[1]])
    W1 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[1], FLAGS.K[0]])
    W0 = Gamma(0.1, 0.3, sample_shape=[FLAGS.K[0], D])

    z3 = Gamma(0.1, 0.1, sample_shape=[N, FLAGS.K[2]])
    z2 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z3, W2))
    z1 = Gamma(FLAGS.shape, FLAGS.shape / tf.matmul(z2, W1))
    x = Poisson(tf.matmul(z1, W0))

    # INFERENCE
    qW2 = pointmass_q(W2.shape)
    qW1 = pointmass_q(W1.shape)
    qW0 = pointmass_q(W0.shape)
    if FLAGS.q == 'gamma':
        qz3 = gamma_q(z3.shape)
        qz2 = gamma_q(z2.shape)
        qz1 = gamma_q(z1.shape)
    else:
        qz3 = lognormal_q(z3.shape)
        qz2 = lognormal_q(z2.shape)
        qz1 = lognormal_q(z1.shape)

    # We apply variational EM with E-step over local variables
    # and M-step to point estimate the global weight matrices.
    inference_e = ed.KLqp({
        z1: qz1,
        z2: qz2,
        z3: qz3
    },
                          data={
                              x: x_train,
                              W0: qW0,
                              W1: qW1,
                              W2: qW2
                          })
    inference_m = ed.MAP({
        W0: qW0,
        W1: qW1,
        W2: qW2
    },
                         data={
                             x: x_train,
                             z1: qz1,
                             z2: qz2,
                             z3: qz3
                         })

    optimizer_e = tf.train.RMSPropOptimizer(FLAGS.lr)
    optimizer_m = tf.train.RMSPropOptimizer(FLAGS.lr)
    kwargs = {
        'optimizer': optimizer_e,
        'n_print': 100,
        'logdir': FLAGS.logdir,
        'log_timestamp': False
    }
    if FLAGS.q == 'gamma':
        kwargs['n_samples'] = 30
    inference_e.initialize(**kwargs)
    inference_m.initialize(optimizer=optimizer_m)

    sess = ed.get_session()
    tf.global_variables_initializer().run()

    n_epoch = 20
    n_iter_per_epoch = 10000
    for epoch in range(n_epoch):
        print("Epoch {}".format(epoch))
        nll = 0.0

        pbar = Progbar(n_iter_per_epoch)
        for t in range(1, n_iter_per_epoch + 1):
            pbar.update(t)
            info_dict_e = inference_e.update()
            info_dict_m = inference_m.update()
            nll += info_dict_e['loss']

        # Compute perplexity averaged over a number of training iterations.
        # The model's negative log-likelihood of data is upper bounded by
        # the variational objective.
        nll /= n_iter_per_epoch
        perplexity = np.exp(nll / np.sum(x_train))
        print("Negative log-likelihood <= {:0.3f}".format(nll))
        print("Perplexity <= {:0.3f}".format(perplexity))

        # Print top 10 words for first 10 topics.
        qW0_vals = sess.run(qW0)
        for k in range(10):
            top_words_idx = qW0_vals[k, :].argsort()[-10:][::-1]
            top_words = " ".join([words[i] for i in top_words_idx])
            print("Topic {}: {}".format(k, top_words))
def main(_):
    ed.set_seed(42)

    # DATA
    (x_train, _), (x_test, _), (x_valid,
                                _) = caltech101_silhouettes(FLAGS.data_dir)
    x_train_generator = generator(x_train, FLAGS.batch_size)
    x_ph = tf.placeholder(tf.int32, [None, 28 * 28])

    # MODEL
    zs = [0] * len(FLAGS.hidden_sizes)
    for l in reversed(range(len(FLAGS.hidden_sizes))):
        if l == len(FLAGS.hidden_sizes) - 1:
            logits = tf.zeros([tf.shape(x_ph)[0], FLAGS.hidden_sizes[l]])
        else:
            logits = tf.layers.dense(tf.cast(zs[l + 1], tf.float32),
                                     FLAGS.hidden_sizes[l],
                                     activation=None)
        zs[l] = Bernoulli(logits=logits)

    x = Bernoulli(logits=tf.layers.dense(
        tf.cast(zs[0], tf.float32), 28 * 28, activation=None))

    # INFERENCE
    # Define variational model with reverse ordering as probability model:
    # if p is 15-100-300 from top-down, q is 300-100-15 from bottom-up.
    qzs = [0] * len(FLAGS.hidden_sizes)
    for l in range(len(FLAGS.hidden_sizes)):
        if l == 0:
            logits = tf.layers.dense(tf.cast(x_ph, tf.float32),
                                     FLAGS.hidden_sizes[l],
                                     activation=None)
        else:
            logits = tf.layers.dense(tf.cast(qzs[l - 1], tf.float32),
                                     FLAGS.hidden_sizes[l],
                                     activation=None)
        qzs[l] = Bernoulli(logits=logits)

    inference = ed.KLqp({z: qz for z, qz in zip(zs, qzs)}, data={x: x_ph})
    optimizer = tf.train.AdamOptimizer(FLAGS.step_size)
    inference.initialize(optimizer=optimizer, n_samples=FLAGS.n_train_samples)

    # Build tensor for log-likelihood given one variational sample to run
    # on test data.
    x_post = ed.copy(x, {z: qz for z, qz in zip(zs, qzs)})
    x_neg_log_prob = (-tf.reduce_sum(x_post.log_prob(x_ph)) /
                      tf.cast(tf.shape(x_ph)[0], tf.float32))

    sess = ed.get_session()
    tf.global_variables_initializer().run()

    for epoch in range(FLAGS.n_epoch):
        print("Epoch {}".format(epoch))
        train_loss = 0.0

        pbar = Progbar(FLAGS.n_iter_per_epoch)
        for t in range(1, FLAGS.n_iter_per_epoch + 1):
            pbar.update(t)
            x_batch = next(x_train_generator)
            info_dict = inference.update(feed_dict={x_ph: x_batch})
            train_loss += info_dict['loss']

        # Print per-data point loss, averaged over training epoch.
        train_loss /= FLAGS.n_iter_per_epoch
        train_loss /= FLAGS.batch_size
        print("Training negative log-likelihood: {:0.3f}".format(train_loss))

        test_loss = [
            sess.run(x_neg_log_prob, {x_ph: x_test})
            for _ in range(FLAGS.n_test_samples)
        ]
        test_loss = np.mean(test_loss)
        print("Test negative log-likelihood: {:0.3f}".format(test_loss))

        # Prior predictive check.
        images = sess.run(x,
                          {x_ph: x_batch})  # feed ph to determine sample size
        for m in range(FLAGS.batch_size):
            imsave("{}/{}.png".format(out_dir, m), images[m].reshape(28, 28))
示例#9
0
文件: utils.py 项目: qkuang/tf_gbds
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   auto_transform=True,
                   logdir=None,
                   log_timestamp=True,
                   log_vars=None,
                   debug=False,
                   optimizer=None,
                   var_list=None,
                   use_prettytensor=False,
                   global_step=None,
                   n_samples=1,
                   kl_scaling=None,
                   maxnorm=5.):

        if kl_scaling is None:
            kl_scaling = {}
        if n_samples <= 0:
            raise ValueError(
                "n_samples should be greater than zero: {}".format(n_samples))

        self.n_samples = n_samples
        self.kl_scaling = kl_scaling

        # from inference.py
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 100)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")
        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")
        self.scale = scale

        self.transformations = {}
        if auto_transform:
            latent_vars = self.latent_vars.copy()
            self.latent_vars = {}
            self.latent_vars_unconstrained = {}
            for z, qz in six.iteritems(latent_vars):
                if hasattr(z, 'support') and hasattr(qz, 'support') and \
                        z.support != qz.support and qz.support != 'point':

                    z_unconstrained = transform(z)
                    self.transformations[z] = z_unconstrained

                    if qz.support == "points":
                        qz_unconstrained = qz
                    else:
                        qz_unconstrained = transform(qz)
                    self.latent_vars_unconstrained[
                        z_unconstrained] = qz_unconstrained

                    if z_unconstrained != z:
                        qz_constrained = transform(
                            qz_unconstrained,
                            bijectors.Invert(z_unconstrained.bijector))

                        try:
                            qz_constrained.params = \
                                    z_unconstrained.bijector.inverse(
                                        qz_unconstrained.params)
                        except:
                            pass
                    else:
                        qz_constrained = qz_unconstrained

                    self.latent_vars[z] = qz_constrained
                else:
                    self.latent_vars[z] = qz
                    self.latent_vars_unconstrained[z] = qz
            del latent_vars

        if logdir is not None:
            self.logging = True
            if log_timestamp:
                logdir = os.path.expanduser(logdir)
                logdir = os.path.join(
                    logdir,
                    datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

            self._summary_key = tf.get_default_graph().unique_name("summaries")
            self._set_log_variables(log_vars)
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()

        self.reset = [tf.variables_initializer([self.t])]

        # from variational_inference.py
        if var_list is None:
            var_list = set()
            trainables = tf.trainable_variables()
            for z, qz in six.iteritems(self.latent_vars):
                var_list.update(get_variables(z, collection=trainables))
                var_list.update(get_variables(qz, collection=trainables))

            for x, qx in six.iteritems(self.data):
                if isinstance(x, RandomVariable) and \
                        not isinstance(qx, RandomVariable):
                    var_list.update(get_variables(x, collection=trainables))

        var_list = list(var_list)

        self.loss, grads_and_vars = self.build_loss_and_gradients(var_list)

        clipped_grads_and_vars = []
        for grad, var in grads_and_vars:
            if "kernel" in var.name or "bias" in var.name:
                clipped_grads_and_vars.append((tf.clip_by_norm(grad,
                                                               maxnorm,
                                                               axes=[0]), var))
            else:
                clipped_grads_and_vars.append((grad, var))
        # for grad, var in grads_and_vars:
        #     clipped_grads_and_vars.append(
        #         (tf.clip_by_value(grad, -1000., 1000.), var))
        del grads_and_vars

        if self.logging:
            tf.summary.scalar("loss",
                              self.loss,
                              collections=[self._summary_key])
        for grad, var in clipped_grads_and_vars:
            tf.summary.histogram("gradient/" + var.name.replace(':', '/'),
                                 grad,
                                 collections=[self._summary_key])
            tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'),
                              tf.norm(grad),
                              collections=[self._summary_key])

        self.summarize = tf.summary.merge_all(key=self._summary_key)

        if optimizer is None and global_step is None:
            global_step = tf.Variable(0, trainable=False, name="global_step")

        if isinstance(global_step, tf.Variable):
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step,
                                                       100,
                                                       0.9,
                                                       staircase=True)
        else:
            learning_rate = 0.01

        # Build optimizer.
        if optimizer is None:
            optimizer = tf.train.AdamOptimizer(learning_rate)
        elif isinstance(optimizer, str):
            if optimizer == 'gradientdescent':
                optimizer = tf.train.GradientDescentOptimizer(learning_rate)
            elif optimizer == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer(learning_rate)
            elif optimizer == 'adagrad':
                optimizer = tf.train.AdagradOptimizer(learning_rate)
            elif optimizer == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
            elif optimizer == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            elif optimizer == 'ftrl':
                optimizer = tf.train.FtrlOptimizer(learning_rate)
            elif optimizer == 'rmsprop':
                optimizer = tf.train.RMSPropOptimizer(learning_rate)
            else:
                raise ValueError('Optimizer class not found:', optimizer)
        elif not isinstance(optimizer, tf.train.Optimizer):
            raise TypeError(
                "Optimizer must be str, tf.train.Optimizer, or None.")

        with tf.variable_scope(None, default_name="optimizer") as scope:
            if not use_prettytensor:
                self.train = optimizer.apply_gradients(clipped_grads_and_vars,
                                                       global_step=global_step)
            else:
                import prettytensor as pt
                self.train = pt.apply_optimizer(optimizer,
                                                losses=[self.loss],
                                                global_step=global_step,
                                                var_list=var_list)

        self.reset.append(
            tf.variables_initializer(
                tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                  scope=scope.name)))
示例#10
0
def main(_):
    ed.set_seed(42)

    # DATA
    x_train, _, x_test = text8(FLAGS.data_dir)
    vocab = string.ascii_lowercase + ' '
    vocab_size = len(vocab)
    encoder = dict(zip(vocab, range(vocab_size)))
    decoder = {v: k for k, v in encoder.items()}

    data = generator(x_train, FLAGS.batch_size, FLAGS.timesteps, encoder)

    # MODEL
    x_ph = tf.placeholder(tf.int32, [None, FLAGS.timesteps])
    with tf.variable_scope("language_model"):
        # Shift input sequence to right by 1, [0, x[0], ..., x[timesteps - 2]].
        x_ph_shift = tf.pad(x_ph, [[0, 0], [1, 0]])[:, :-1]
        x = language_model(x_ph_shift, vocab_size)

    with tf.variable_scope("language_model", reuse=True):
        x_gen = language_model_gen(5, vocab_size)

    imb = range(0, len(x_test) - FLAGS.timesteps, FLAGS.timesteps)
    encoded_x_test = np.asarray(
        [[encoder[c] for c in x_test[i:(i + FLAGS.timesteps)]] for i in imb],
        dtype=np.int32)
    test_size = encoded_x_test.shape[0]
    print("Test set shape: {}".format(encoded_x_test.shape))
    test_nll = -tf.reduce_sum(x.log_prob(x_ph))

    # INFERENCE
    inference = ed.MAP({}, {x: x_ph})

    optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.lr)
    inference.initialize(optimizer=optimizer,
                         logdir=FLAGS.log_dir,
                         log_timestamp=False)

    print("Number of sets of parameters: {}".format(
        len(tf.trainable_variables())))
    print("Number of parameters: {}".format(
        np.sum([np.prod(v.shape.as_list())
                for v in tf.trainable_variables()])))
    for v in tf.trainable_variables():
        print(v)

    sess = ed.get_session()
    tf.global_variables_initializer().run()

    # Double n_epoch and print progress every half an epoch.
    n_iter_per_epoch = len(x_train) // (FLAGS.batch_size * FLAGS.timesteps * 2)
    epoch = 0.0
    for _ in range(FLAGS.n_epoch * 2):
        epoch += 0.5
        print("Epoch: {0}".format(epoch))
        avg_nll = 0.0

        pbar = Progbar(n_iter_per_epoch)
        for t in range(1, n_iter_per_epoch + 1):
            pbar.update(t)
            x_batch = next(data)
            info_dict = inference.update({x_ph: x_batch})
            avg_nll += info_dict['loss']

        # Print average bits per character over epoch.
        avg_nll /= (n_iter_per_epoch * FLAGS.batch_size * FLAGS.timesteps *
                    np.log(2))
        print("Train average bits/char: {:0.8f}".format(avg_nll))

        # Print per-data point log-likelihood on test set.
        avg_nll = 0.0
        for start in range(0, test_size, batch_size):
            end = min(test_size, start + batch_size)
            x_batch = encoded_x_test[start:end]
            avg_nll += sess.run(test_nll, {x_ph: x_batch})

        avg_nll /= test_size
        print("Test average NLL: {:0.8f}".format(avg_nll))

        # Generate samples from model.
        samples = sess.run(x_gen)
        samples = [''.join([decoder[c] for c in sample]) for sample in samples]
        print("Samples:")
        for sample in samples:
            print(sample)
示例#11
0
### predictive check

n_rep = 100  # number of replicated datasets we generate
holdout_gen = np.zeros((n_rep, x_train.shape[0], x_train.shape[1]))

for i in range(n_rep):
    x_generated = x_post.sample().eval()

    # look only at the heldout entries
    holdout_gen[i] = np.multiply(x_generated, holdout_mask)

n_eval = 10  # we draw samples from the inferred Z and W
obs_ll = []
rep_ll = []
pbar = Progbar(n_eval)
for j in range(n_eval):
    U_sample = U_post.sample().eval()
    V_sample = V_post.sample().eval()

    holdoutmean_sample = np.multiply(U_sample.dot(V_sample.T), holdout_mask)
    obs_ll.append(
        np.mean(np.ma.masked_invalid(
            stats.poisson.logpmf(np.array(x_vad, dtype=int),
                                 holdoutmean_sample)),
                axis=1))

    rep_ll.append(
        np.mean(np.ma.masked_invalid(
            stats.poisson.logpmf(holdout_gen, holdoutmean_sample)),
                axis=2))