Exemplo n.º 1
0
def transform(x, *args, **kwargs):
    """Transform a continuous random variable to the unconstrained space.

  Transform selects among a number of defaults transformations which depend
  on the support of the provided random variable.

  Args:
    x : RandomVariable.
      Continuous random variable to transform.
    *args, **kwargs : optional.
      Arguments to overwrite when forming the ``TransformedDistribution``.
      For example, one can manually specify the transformation by
      passing in the ``bijector`` argument.

  Returns:
    RandomVariable.
    A ``TransformedDistribution`` random variable, or the provided random
    variable if no transformation was applied.

  #### Examples

  ```python
  x = Gamma(1.0, 1.0)
  y = ed.transform(x)
  sess = tf.Session()
  sess.run(y)
  -2.2279539
  ```
  """
    if len(args) != 0 or kwargs.get('bijector', None) is not None:
        return TransformedDistribution(x, *args, **kwargs)

    try:
        support = x.support
    except AttributeError as e:
        msg = """'{}' object has no 'support'
             so cannot be transformed.""".format(type(x).__name__)
        raise ValueError(msg)

    if support == '01':
        bij = bijectors.Invert(bijectors.Sigmoid())
    elif support == 'nonnegative':
        bij = bijectors.Invert(bijectors.Softplus())
    elif support == 'simplex':
        bij = bijectors.Invert(bijectors.SoftmaxCentered(event_ndims=1))
    elif support == 'real' or support == 'multivariate_real':
        return x
    else:
        msg = "'transform' does not handle supports of type '{}'".format(
            support)
        raise NotImplementedError(msg)

    return TransformedDistribution(x, bij, *args, **kwargs)
    def test_auto_transform_true(self):
        with self.test_session() as sess:
            # Match normal || softplus-inverse-normal distribution with
            # automated transformation on latter (assuming it is softplus).
            x = TransformedDistribution(
                distribution=Normal(0.0, 0.5),
                bijector=tf.contrib.distributions.bijectors.Softplus())
            x.support = 'nonnegative'
            qx = Normal(loc=tf.Variable(tf.random_normal([])),
                        scale=tf.nn.softplus(tf.Variable(tf.random_normal(
                            []))))

            inference = ed.KLqp({x: qx})
            inference.initialize(auto_transform=True, n_samples=5, n_iter=1000)
            tf.global_variables_initializer().run()
            for _ in range(inference.n_iter):
                info_dict = inference.update()

            # Check approximation on constrained space has same moments as
            # target distribution.
            n_samples = 10000
            x_mean, x_var = tf.nn.moments(x.sample(n_samples), 0)
            x_unconstrained = inference.transformations[x]
            qx_constrained = transform(
                qx, bijectors.Invert(x_unconstrained.bijector))
            qx_mean, qx_var = tf.nn.moments(qx_constrained.sample(n_samples),
                                            0)
            stats = sess.run([x_mean, qx_mean, x_var, qx_var])
            self.assertAllClose(info_dict['loss'], 0.0, rtol=0.2, atol=0.2)
            self.assertAllClose(stats[0], stats[1], rtol=1e-1, atol=1e-1)
            self.assertAllClose(stats[2], stats[3], rtol=1e-1, atol=1e-1)
Exemplo n.º 3
0
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   auto_transform=True,
                   logdir=None,
                   log_timestamp=True,
                   log_vars=None,
                   debug=False):
        """Initialize inference algorithm. It initializes hyperparameters
    and builds ops for the algorithm's computation graph.

    Any derived class of `Inference` **must** implement this method.
    No methods which build ops should be called outside `initialize()`.

    Args:
      n_iter: int, optional.
        Number of iterations for algorithm when calling `run()`.
        Alternatively if controlling inference manually, it is the
        expected number of calls to `update()`; this number determines
        tracking information during the print progress.
      n_print: int, optional.
        Number of iterations for each print progress. To suppress print
        progress, then specify 0. Default is `int(n_iter / 100)`.
      scale: dict of RandomVariable to tf.Tensor, optional.
        A tensor to scale computation for any random variable that it is
        binded to. Its shape must be broadcastable; it is multiplied
        element-wise to the random variable. For example, this is useful
        for mini-batch scaling when inferring global variables, or
        applying masks on a random variable.
      auto_transform: bool, optional.
        Whether to automatically transform continuous latent variables
        of unequal support to be on the unconstrained space. It is
        only applied if the argument is `True`, the latent variable
        pair are `ed.RandomVariable`s with the `support` attribute,
        the supports are both continuous and unequal.
      logdir: str, optional.
        Directory where event file will be written. For details,
        see `tf.summary.FileWriter`. Default is to log nothing.
      log_timestamp: bool, optional.
        If True (and `logdir` is specified), create a subdirectory of
        `logdir` to save the specific run results. The subdirectory's
        name is the current UTC timestamp with format 'YYYYMMDD_HHMMSS'.
      log_vars: list, optional.
        Specifies the list of variables to log after each `n_print`
        steps. If None, will log all variables. If `[]`, no variables
        will be logged. `logdir` must be specified for variables to be
        logged.
      debug: bool, optional.
        If True, add checks for `NaN` and `Inf` to all computations
        in the graph. May result in substantially slower execution
        times.
    """
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 100)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")

        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")

        self.scale = scale

        # map from original latent vars to unconstrained versions
        self.transformations = {}
        if auto_transform:
            latent_vars = self.latent_vars.copy()
            # latent_vars maps original latent vars to constrained Q's.
            # latent_vars_unconstrained maps unconstrained vars to unconstrained Q's.
            self.latent_vars = {}
            self.latent_vars_unconstrained = {}
            for z, qz in six.iteritems(latent_vars):
                if hasattr(z, 'support') and hasattr(qz, 'support') and \
                        z.support != qz.support and qz.support != 'point':

                    # transform z to an unconstrained space
                    z_unconstrained = transform(z)
                    self.transformations[z] = z_unconstrained

                    # make sure we also have a qz that covers the unconstrained space
                    if qz.support == "points":
                        qz_unconstrained = qz
                    else:
                        qz_unconstrained = transform(qz)
                    self.latent_vars_unconstrained[
                        z_unconstrained] = qz_unconstrained

                    # additionally construct the transformation of qz
                    # back into the original constrained space
                    if z_unconstrained != z:
                        qz_constrained = transform(
                            qz_unconstrained,
                            bijectors.Invert(z_unconstrained.bijector))

                        try:  # attempt to pushforward the params of Empirical distributions
                            qz_constrained.params = z_unconstrained.bijector.inverse(
                                qz_unconstrained.params)
                        except:  # qz_unconstrained is not an Empirical distribution
                            pass

                    else:
                        qz_constrained = qz_unconstrained

                    self.latent_vars[z] = qz_constrained
                else:
                    self.latent_vars[z] = qz
                    self.latent_vars_unconstrained[z] = qz
            del latent_vars

        if logdir is not None:
            self.logging = True
            if log_timestamp:
                logdir = os.path.expanduser(logdir)
                logdir = os.path.join(
                    logdir,
                    datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

            self._summary_key = tf.get_default_graph().unique_name("summaries")
            self._set_log_variables(log_vars)
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()

        # Store reset ops which user can call. Subclasses should append
        # any ops needed to reset internal variables in inference.
        self.reset = [tf.variables_initializer([self.t])]
Exemplo n.º 4
0
    def initialize(self,
                   n_iter=1000,
                   n_print=None,
                   scale=None,
                   auto_transform=True,
                   logdir=None,
                   log_timestamp=True,
                   log_vars=None,
                   debug=False,
                   optimizer=None,
                   var_list=None,
                   use_prettytensor=False,
                   global_step=None,
                   n_samples=1,
                   kl_scaling=None,
                   maxnorm=5.):

        if kl_scaling is None:
            kl_scaling = {}
        if n_samples <= 0:
            raise ValueError(
                "n_samples should be greater than zero: {}".format(n_samples))

        self.n_samples = n_samples
        self.kl_scaling = kl_scaling

        # from inference.py
        self.n_iter = n_iter
        if n_print is None:
            self.n_print = int(n_iter / 100)
        else:
            self.n_print = n_print

        self.progbar = Progbar(self.n_iter)
        self.t = tf.Variable(0, trainable=False, name="iteration")
        self.increment_t = self.t.assign_add(1)

        if scale is None:
            scale = {}
        elif not isinstance(scale, dict):
            raise TypeError("scale must be a dict object.")
        self.scale = scale

        self.transformations = {}
        if auto_transform:
            latent_vars = self.latent_vars.copy()
            self.latent_vars = {}
            self.latent_vars_unconstrained = {}
            for z, qz in six.iteritems(latent_vars):
                if hasattr(z, 'support') and hasattr(qz, 'support') and \
                        z.support != qz.support and qz.support != 'point':

                    z_unconstrained = transform(z)
                    self.transformations[z] = z_unconstrained

                    if qz.support == "points":
                        qz_unconstrained = qz
                    else:
                        qz_unconstrained = transform(qz)
                    self.latent_vars_unconstrained[
                        z_unconstrained] = qz_unconstrained

                    if z_unconstrained != z:
                        qz_constrained = transform(
                            qz_unconstrained,
                            bijectors.Invert(z_unconstrained.bijector))

                        try:
                            qz_constrained.params = \
                                    z_unconstrained.bijector.inverse(
                                        qz_unconstrained.params)
                        except:
                            pass
                    else:
                        qz_constrained = qz_unconstrained

                    self.latent_vars[z] = qz_constrained
                else:
                    self.latent_vars[z] = qz
                    self.latent_vars_unconstrained[z] = qz
            del latent_vars

        if logdir is not None:
            self.logging = True
            if log_timestamp:
                logdir = os.path.expanduser(logdir)
                logdir = os.path.join(
                    logdir,
                    datetime.strftime(datetime.utcnow(), "%Y%m%d_%H%M%S"))

            self._summary_key = tf.get_default_graph().unique_name("summaries")
            self._set_log_variables(log_vars)
            self.train_writer = tf.summary.FileWriter(logdir,
                                                      tf.get_default_graph())
        else:
            self.logging = False

        self.debug = debug
        if self.debug:
            self.op_check = tf.add_check_numerics_ops()

        self.reset = [tf.variables_initializer([self.t])]

        # from variational_inference.py
        if var_list is None:
            var_list = set()
            trainables = tf.trainable_variables()
            for z, qz in six.iteritems(self.latent_vars):
                var_list.update(get_variables(z, collection=trainables))
                var_list.update(get_variables(qz, collection=trainables))

            for x, qx in six.iteritems(self.data):
                if isinstance(x, RandomVariable) and \
                        not isinstance(qx, RandomVariable):
                    var_list.update(get_variables(x, collection=trainables))

        var_list = list(var_list)

        self.loss, grads_and_vars = self.build_loss_and_gradients(var_list)

        clipped_grads_and_vars = []
        for grad, var in grads_and_vars:
            if "kernel" in var.name or "bias" in var.name:
                clipped_grads_and_vars.append((tf.clip_by_norm(grad,
                                                               maxnorm,
                                                               axes=[0]), var))
            else:
                clipped_grads_and_vars.append((grad, var))
        # for grad, var in grads_and_vars:
        #     clipped_grads_and_vars.append(
        #         (tf.clip_by_value(grad, -1000., 1000.), var))
        del grads_and_vars

        if self.logging:
            tf.summary.scalar("loss",
                              self.loss,
                              collections=[self._summary_key])
        for grad, var in clipped_grads_and_vars:
            tf.summary.histogram("gradient/" + var.name.replace(':', '/'),
                                 grad,
                                 collections=[self._summary_key])
            tf.summary.scalar("gradient_norm/" + var.name.replace(':', '/'),
                              tf.norm(grad),
                              collections=[self._summary_key])

        self.summarize = tf.summary.merge_all(key=self._summary_key)

        if optimizer is None and global_step is None:
            global_step = tf.Variable(0, trainable=False, name="global_step")

        if isinstance(global_step, tf.Variable):
            starter_learning_rate = 0.1
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step,
                                                       100,
                                                       0.9,
                                                       staircase=True)
        else:
            learning_rate = 0.01

        # Build optimizer.
        if optimizer is None:
            optimizer = tf.train.AdamOptimizer(learning_rate)
        elif isinstance(optimizer, str):
            if optimizer == 'gradientdescent':
                optimizer = tf.train.GradientDescentOptimizer(learning_rate)
            elif optimizer == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer(learning_rate)
            elif optimizer == 'adagrad':
                optimizer = tf.train.AdagradOptimizer(learning_rate)
            elif optimizer == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
            elif optimizer == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            elif optimizer == 'ftrl':
                optimizer = tf.train.FtrlOptimizer(learning_rate)
            elif optimizer == 'rmsprop':
                optimizer = tf.train.RMSPropOptimizer(learning_rate)
            else:
                raise ValueError('Optimizer class not found:', optimizer)
        elif not isinstance(optimizer, tf.train.Optimizer):
            raise TypeError(
                "Optimizer must be str, tf.train.Optimizer, or None.")

        with tf.variable_scope(None, default_name="optimizer") as scope:
            if not use_prettytensor:
                self.train = optimizer.apply_gradients(clipped_grads_and_vars,
                                                       global_step=global_step)
            else:
                import prettytensor as pt
                self.train = pt.apply_optimizer(optimizer,
                                                losses=[self.loss],
                                                global_step=global_step,
                                                var_list=var_list)

        self.reset.append(
            tf.variables_initializer(
                tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                  scope=scope.name)))
                 probs=p,
                 concentrations=conc_param *
                 tf.ones(n_samples, dtype=np.float32),
                 sample_shape=n_samples,
                 value=tf.zeros(n_samples, dtype="float32"))

# INFERENCE
#qp = edward.models.BetaWithSoftplusConcentration(tf.Variable(1.), tf.Variable(1.))
qp = ed.models.Normal(loc=tf.get_variable("qp/loc", []),
                      scale=tf.nn.softplus(tf.get_variable("qp/scale", [])))
qconc = ed.models.Normal(loc=tf.get_variable("qconc/loc", []),
                         scale=tf.nn.softplus(
                             tf.get_variable("qconc/scale", [])))

inference = ed.KLqp({p: qp, conc_param: qconc}, data={x: x_data})
inference.run()

# PRINT RESULTS
qp_samples = ed.transform(
    qp, bijectors.Invert(
        inference.transformations[p].bijector)).sample(100).eval()

print("True prob success: {:.2f}, inferred {:.3f} +- {:.2f}".format(
    p_true, qp_samples.mean(), np.sqrt(qp_samples.var())))

qconc_samples = ed.transform(
    qconc, bijectors.Invert(
        inference.transformations[conc_param].bijector)).sample(100).eval()

print("True concentration: {:.2f}, Inferred: {:.3f} +- {:.2f}".format(
    true_conc, qconc_samples.mean(), np.sqrt(qconc_samples.var())))