예제 #1
0
    def sample_trajectories(self, initial_points: tf.Tensor,
                            sim_steps: int) -> tf.Tensor:
        """
        Simulate trajectories starting at initial points (one trajectory for each initial point) for 'sim_steps'
        simulation steps

        :param initial_points: a Matrix (nb_different_initial_points x dimension)
        :param sim_steps: number of simulation steps
        :return: SDE trajectories, one for each different initial point
        """
        # assert initial_points.ndim == 2, "initial_points is not a matrix"
        diffusions = tfp.distributions.MultivariateNormalDiag(
            tf.zeros(initial_points.shape, dtype=tf_floatx()),
            tf.sqrt(self.diffusion.expected_diffusion()),
        ).sample(sim_steps, dtype=tf_floatx())
        trajectories = tf.scan(lambda x_tm1, noise_term: x_tm1 + self.
                               drift_svgp.predict_f(x_tm1)[0] + noise_term,
                               elems=diffusions,
                               initializer=initial_points,
                               name='sde_sim')
        # transpose so that output has shape (initial_points.shape[0], sim_steps + 1, input_dim)
        return tf.concat([
            initial_points[:, tf.newaxis, :],
            tf.transpose(trajectories, [1, 0, 2])
        ],
                         axis=1)
예제 #2
0
    def _encode_and_decode(self,
                           y_input,
                           training=None,
                           initial_state=None,
                           use_mask=False):
        """
        :param y_input:
        :param training:
        :param initial_state: (encoder_initial_state,  initial_dynamic_mean, initial_dynamic_prec)
        :return:
        """
        encoder_initial_state, x0_stats = self._unzip_initial_state(
            initial_state)

        encoded_means, encoded_scales, encoder_states = self.encoder(
            y_input, training=training, initial_state=encoder_initial_state)
        initial_dynamic_mean, initial_dynamic_scale = self._handle_x0_stats(
            x0_stats, (encoded_means, encoded_scales))

        # encoded_covs = tfd.MultivariateNormalDiag(scale_diag=encoded_scales).covariance()
        samples, entropies, sampling_distro, final_mean, final_prec = (
            self.mpa.forward_backward(
                # encoded_means, encoded_covs, initial_dynamic_mean, tf.map_fn(lambda x: tf.linalg.diag(1 / x ** 2), initial_dynamic_scale)
                (initial_dynamic_mean, initial_dynamic_scale),
                (encoded_means, encoded_scales)))
        decoded_means, decoded_scales_diag = tf.map_fn(
            lambda x_: self.decoder(x_, training=training),
            samples,
            dtype=(tf_floatx(), tf_floatx()))
        return ((samples, entropies, sampling_distro), (encoded_means,
                                                        encoded_scales),
                (decoded_means, decoded_scales_diag), (encoder_states,
                                                       final_mean, final_prec))
예제 #3
0
    def __init__(self,
                 kernel: gpflow.kernels.SeparateIndependent,
                 inducing_points: tf.Tensor,
                 num_latent: int,
                 prior_scale=1.0):
        self.nb_inducing_variables = int(inducing_points.shape[0])
        inducing_variables = gpflow.inducing_variables.SharedIndependentInducingVariables(
            gpflow.inducing_variables.InducingPoints(
                tf.Variable(inducing_points, name='inducing_points.py')))
        super().__init__(kernel,
                         gpflow.likelihoods.Gaussian(),
                         inducing_variable=inducing_variables,
                         num_latent_gps=num_latent)

        self._q_mu_0 = self.q_mu
        self._q_sqrt_0 = self.q_sqrt

        self._vague_prior = np.any(np.isinf(prior_scale))
        if not self._vague_prior:
            self.prior_distribution = tfd.MultivariateNormalDiag(
                tf.zeros_like(tf.transpose(self.q_mu), dtype=tf_floatx()),
                tf.repeat(tf.convert_to_tensor(prior_scale,
                                               dtype=tf_floatx())[...,
                                                                  tf.newaxis],
                          self.q_mu.shape[0],
                          axis=-1))
예제 #4
0
    def __init__(self, diff_parameters: DiffParameters):
        super(Diffusion, self).__init__()

        def get_bijector():
            if gpflow.config.default_positive_bijector() == 'exp':
                return tfp.bijectors.Exp()
            elif gpflow.config.default_positive_bijector() == 'softplus':
                return tfp.bijectors.Softplus()
            else:
                raise ValueError(
                    "Unexpected value in default_positive_bijector()")

        assert len(diff_parameters.alphas) == len(
            diff_parameters.betas), "len(alphas) != len(betas)"
        self.dimension = len(diff_parameters.alphas)
        alphas = diff_parameters.alphas
        betas = diff_parameters.betas
        self._alphas = gpflow.Parameter(
            tf.ones_like(alphas, dtype=tf_floatx()),
            # alphas,
            transform=get_bijector(),
            name='alphas')
        self._betas = gpflow.Parameter(
            # TODO
            tf.ones_like(betas, dtype=tf_floatx()),
            # betas,
            transform=get_bijector(),
            name='betas')
        self.prior_distribution = tfd.Gamma(alphas, betas)
예제 #5
0
 def __init__(self, initial_learning_rate, maximum_learning_rate, growth_steps,
              midpoint, name=None):
     super(SigmoidScheduler, self).__init__()
     self.initial_learning_rate = tf.convert_to_tensor(initial_learning_rate, dtype=tf_floatx())
     self.maximum_learning_rate = tf.convert_to_tensor(maximum_learning_rate, dtype=tf_floatx())
     self.delta = self.maximum_learning_rate - self.initial_learning_rate
     self.growth_steps = tf.convert_to_tensor(growth_steps, dtype=tf_floatx())
     self.midpoint = tf.convert_to_tensor(midpoint, dtype=tf_floatx())
     self.name = name
예제 #6
0
def draw_fast_samples(vae, initial_state, x_chunk):
    rnn_state, mean0, scale0 = vae._handle_x0_state(initial_state, x_chunk)
    mean, scales, states = vae.encoder(x_chunk,
                                       training=True,
                                       initial_state=rnn_state)
    distro = tfd.MultivariateNormalDiag(mean, scales)
    samples = distro.sample(vae.mpa.nb_samples)
    covs = tfp.stats.covariance(tf.reshape(samples, (*samples.shape[:2], -1)),
                                sample_axis=0)
    # TODO
    covs = covs + 1e-8 * tf.eye(
        covs.shape[1], batch_shape=[covs.shape[0]], dtype=tf_floatx())
    entropies = 0.5 * covs.shape[1] * (1 + tf.math.log(2 * tf.constant(
        np.pi, dtype=tf_floatx()))) + 0.5 * tf.linalg.logdet(covs)
    return samples, entropies, (mean, scales), (mean0, scale0), states
예제 #7
0
    def automatic_sde_nat_grads(self,
                                y_input,
                                y_target,
                                samples,
                                entropies,
                                encoded_dist,
                                decoded_dist,
                                initial_state,
                                effective_nb_timesteps=None,
                                kl_weight=tf.convert_to_tensor(
                                    1.0, dtype=tf_floatx())):
        alphas = self.sde_model.diffusion._alphas
        betas = self.sde_model.diffusion._betas
        q_mu = self.sde_model.drift_svgp.q_mu
        q_sqrt = self.sde_model.drift_svgp.q_sqrt
        vars = [
            alphas.unconstrained_variable, betas.unconstrained_variable,
            q_mu.unconstrained_variable, q_sqrt.unconstrained_variable
        ]
        with tf.GradientTape(persistent=True,
                             watch_accessed_variables=False) as tape:
            tape.watch(vars)
            expectations = self.sde_model.expectation_params()
            xis = self.sde_model.expectation_to_xi(expectations)
            loss = self._loss(y_input, y_target, samples, entropies,
                              encoded_dist, decoded_dist, initial_state,
                              effective_nb_timesteps, kl_weight)

        dL_dxi = tape.gradient(loss, vars)
        # Apply chain rule to get the natural gradients
        natural_gradients = tape.gradient(xis,
                                          expectations,
                                          output_gradients=dL_dxi)
        del tape
        return natural_gradients
예제 #8
0
def optimize_sde_standard_grad(y_input,
                               y_target,
                               gm: VAE,
                               optimizer,
                               initial_state=None,
                               effective_nb_timesteps=None,
                               kl_weight=tf.convert_to_tensor(
                                   1.0, dtype=tf_floatx()),
                               clip_value=100.):
    vvars = gm.sde_model.variational_variables
    (samples, entropies,
     _), encoded_dist, decoded_dist, final_state = gm._encode_and_decode(
         y_input, training=False, initial_state=initial_state)
    with tf.GradientTape(persistent=False,
                         watch_accessed_variables=False) as tape:
        tape.watch(vvars)
        breaked_loss = gm._breaked_loss(y_input, y_target, samples, entropies,
                                        encoded_dist, decoded_dist,
                                        initial_state, effective_nb_timesteps,
                                        kl_weight)
        loss = tf.reduce_sum(breaked_loss)
        # loss = gm.loss(y_input, y_target, training=True)
    vgrads = tape.gradient(loss, vvars)
    optimizer.apply_gradients(zip(vgrads, vvars))
    return loss, breaked_loss, final_state
예제 #9
0
 def kullback_leibler_by_dimension(self,
                                   free_bits=tf.convert_to_tensor(
                                       0, dtype=tf_floatx())):
     # TODO do not permit not whiten representations!
     if self.drift_svgp._vague_prior:
         # The kullback-leibler when using a vague prior, up to constant (theoretically, infinite)
         q_sqrt = self.drift_svgp.q_sqrt
         gaussian_kl = -0.5 * q_sqrt.shape[-1] - tf.reduce_sum(
             tf.math.log(tf.linalg.diag_part(q_sqrt)), 1)
         # We don't apply the free bits technique when using the vague prior, since we already have
         # infinite free bits in the gaussian_kl!
         kl = (gaussian_kl + tfd.Gamma(
             self.diffusion.alphas(), self.diffusion.betas()).kl_divergence(
                 self.diffusion.prior_distribution))
     else:
         kl_mu = tf.transpose(
             tf.sqrt(self.diffusion.expected_precision())[tf.newaxis, ...] *
             self.drift_svgp.q_mu)
         q_sqrt = self.drift_svgp.q_sqrt
         kl = (tfd.MultivariateNormalTriL(kl_mu, q_sqrt).kl_divergence(
             self.drift_svgp.prior_distribution) +
               tfd.Gamma(self.diffusion.alphas(),
                         self.diffusion.betas()).kl_divergence(
                             self.diffusion.prior_distribution))
         kl = tf.math.maximum(free_bits, kl)
     return kl
예제 #10
0
    def __init__(self, initial_learning_rate, maximum_learning_rate, growth_steps, name=None):
        super(NatGradScheduler, self).__init__()
        self.initial_learning_rate = tf.convert_to_tensor(initial_learning_rate, dtype=tf_floatx())
        self.maximum_learning_rate = tf.convert_to_tensor(maximum_learning_rate, dtype=tf_floatx())
        self.growth_steps = tf.convert_to_tensor(growth_steps, dtype=tf_floatx())
        self.name = name

        self._log_initial_learning_rate = tf.math.log(self.initial_learning_rate)
        self._log_maximum_learning_rate = tf.math.log(self.maximum_learning_rate)
예제 #11
0
def tf_pairwise_distance(feature, squared: bool = False):
    """Computes the pairwise distance matrix with numerical stability.
    output[i, j] = || feature[i, :] - feature[j, :] ||_2
    Args:
      feature: 2-D Tensor of size [number of data, feature dimension].
      squared: Boolean, whether or not to square the pairwise distances.
    Returns:
      pairwise_distances: 2-D Tensor of size [number of data, number of data].
    """
    pairwise_distances_squared = tf.math.add(
        tf.math.reduce_sum(tf.math.square(feature), axis=[1], keepdims=True),
        tf.math.reduce_sum(
            tf.math.square(tf.transpose(feature)), axis=[0], keepdims=True
        ),
    ) - 2.0 * tf.matmul(feature, tf.transpose(feature))

    # Deal with numerical inaccuracies. Set small negatives to zero.
    pairwise_distances_squared = tf.math.maximum(pairwise_distances_squared, 0.0)
    # Get the mask where the zero distances are at.
    error_mask = tf.math.less_equal(pairwise_distances_squared, 0.0)

    # Optionally take the sqrt.
    if squared:
        pairwise_distances = pairwise_distances_squared
    else:
        pairwise_distances = tf.math.sqrt(
            pairwise_distances_squared
            + tf.cast(error_mask, dtype=tf.dtypes.float32) * 1e-16
        )

    # Undo conditionally adding 1e-16.
    pairwise_distances = tf.math.multiply(
        pairwise_distances,
        tf.cast(tf.math.logical_not(error_mask), dtype=tf_floatx()),
    )

    num_data = tf.shape(feature)[0]
    # Explicitly set diagonals to zero.
    mask_offdiagonals = tf.ones_like(pairwise_distances) - tf.linalg.diag(
        tf.ones([num_data], dtype=tf_floatx())
    )
    pairwise_distances = tf.math.multiply(pairwise_distances, mask_offdiagonals)
    return pairwise_distances
예제 #12
0
 def forward_step(particles, log_weights, encoding_potential):
     particles = self.proposal_builder(particles,
                                       encoding_potential).sample()
     log_weights = self.weight_fn(log_weights, particles,
                                  encoding_potential)
     log_weights = log_weights - tf.math.reduce_logsumexp(
         log_weights, axis=-1, keepdims=True)
     # Compute effective sample size and entropy of weighting vector.
     # These are useful statistics for adaptive particle filtering.
     weights = tf.exp(log_weights)
     n_eff = 1.0 / tf.reduce_sum(tf.square(weights), axis=1)
     # resampling (systematic resampling) step
     particles, log_weights = tf.map_fn(
         lambda x: tf.cond(
             x[0] < self.n_eff_threshold, lambda: forward_resampling(
                 x[1], x[2], self.alpha), lambda: (x[1], x[2])),
         # Transpose to iterate through batch dimension
         elems=(n_eff, particles, log_weights),
         dtype=(tf_floatx(), tf_floatx()))
     # Transpose so that dimensions denote [particles, batch, ...]
     return particles, log_weights
예제 #13
0
 def synthetize(self, y_input, y_target, simulation_steps):
     samples, encoded_dist, decoded_dist, loss, states = self.__call__(
         y_input,
         y_target,
         training=False,
         initial_state=None,
         effective_nb_timesteps=tf.convert_to_tensor(1.0,
                                                     dtype=tf_floatx()))
     # Use last samples as starting point. We use just the first sample from each batch
     initial_points = samples[0, :, -1, :]
     predicted_samples = self.sde_model.sample_trajectories(
         initial_points, simulation_steps)
     return self.decoder(predicted_samples), predicted_samples
예제 #14
0
 def _parse_dataset(self, folder, scaling_function, batch_size, shuff_buffer_size=0):
     """creates a batched Dataset object. If shuff_buffer_size > 0, shuffling is used"""
     files = glob.glob(os.path.join(folder, "*.tfrecords"))
     nb_files = len(files)
     if nb_files >= batch_size:
         tfdata = tf.data.TFRecordDataset(files)
         # parse the protobuff to a dictionary with the 'signal' feature
         parsed_dataset = tfdata.map(
             lambda proto: tf.io.parse_single_example(proto, {
                 'signal': tf.io.FixedLenFeature([self.example_timesteps], tf.float32,
                                                 default_value=np.zeros(self.example_timesteps))
             })
         )
         # Extract the 'signal' feature from the dictionary to get a tensor representing the signal as
         # a vector of length (len_tbptt, ). We add an extra dimension for consistency with the expected
         # input of RNNs, in which the latest dimension is the dimension of the feature space.
         parsed_dataset = parsed_dataset.map(
             lambda x: x['signal'][..., tf.newaxis],
             num_parallel_calls=tf.data.experimental.AUTOTUNE
         )
         if tf_floatx() != tf.float32:
             parsed_dataset = parsed_dataset.map(
                 lambda x: tf.cast(x, tf_floatx()),
                 num_parallel_calls=tf.data.experimental.AUTOTUNE
             )
         if scaling_function is not None:
             parsed_dataset = parsed_dataset.map(
                 lambda x: scaling_function(x),
                 num_parallel_calls=tf.data.experimental.AUTOTUNE
             )
         parsed_dataset = parsed_dataset.cache() #TODO
         if shuff_buffer_size > 0:
             parsed_dataset = parsed_dataset.shuffle(shuff_buffer_size)
         parsed_dataset = parsed_dataset.batch(batch_size, drop_remainder=True)
         parsed_dataset = parsed_dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # TODO
     else:
         warnings.warn(f"Insufficient number of files in {folder} to create batches of size {batch_size} (returning None)")
         parsed_dataset = None
     return parsed_dataset
예제 #15
0
    def __init__(self,
                 input_dimension: int,
                 output_dimension: int,
                 len_tbptt: int,
                 encoder_type,
                 encoder_hidden_units,
                 encoder_kernel_size,
                 encoder_dilation_rate,
                 phase_space_dimension: int,
                 drift_parameters: DriftParameters,
                 diff_parameters: DiffParameters,
                 pseudo_inputs: np.array,
                 nb_samples: int,
                 initial_prec=INITIAL_PREC):  # TODO:initial_prec
        self.phase_space_dim = phase_space_dimension
        if encoder_type == 'rnn':
            self.encoder = RnnEncoder(encoder_hidden_units,
                                      output_dim=phase_space_dimension)
        # TODO
        # elif encoder_type == 'cnn':
        #     self.encoder = Encoder(encoder_hidden_units, kernel_size=encoder_kernel_size,
        #                            dilation_rate=encoder_dilation_rate, output_dim=phase_space_dimension)
        else:
            raise ValueError('Invalid encoder type')
        self.decoder = Decoder(output_dimension)
        self.sde_model = SdeModel(
            tf.convert_to_tensor(pseudo_inputs, dtype=tf_floatx()),
            drift_parameters, diff_parameters)
        # Using -Inf in case you use a improper prior
        self.minimum_nats = tf.convert_to_tensor(-np.Inf, dtype=tf_floatx())
        # Since we are using, TBPTT, we permit the gm to have a 'state', in the sense that the last means and precs
        # of the current chunk can be propagated to the next chunk as the initial_means and precs.
        # We first set the values for state_0 (t=0) and then create two variables for tracking the state

        self.x0_prior = tfd.MultivariateNormalDiag(
            scale_diag=tf.ones(self.phase_space_dim, dtype=tf_floatx()))
        # TODO: permit to modify the filtering method without changing this
        self.mpa = VaeleParticleFilter(self.sde_model, nb_samples)
        assert nb_samples > 1, 'nb_samples should be > 1'
예제 #16
0
 def __init__(self,
              proposal_builder,
              dynamics_fn,
              weight_fn,
              nb_samples,
              n_particles=100,
              n_eff_threshold=None):
     self.nb_samples = nb_samples
     self.n_particles = n_particles
     self.n_eff_threshold = n_eff_threshold or n_particles // 2
     self.proposal_builder = proposal_builder
     self.dynamics_fn = dynamics_fn
     self.weight_fn = weight_fn
     self.alpha = tf.constant(0.99, dtype=tf_floatx())
예제 #17
0
 def _init_filter(self, init_stats, init_potentials, batch_size):
     # Product of diagonal gaussians
     mean_x, scale_x = init_stats
     mean_y, scale_y = init_potentials
     if mean_x is None or scale_x is None:
         mean_out, scale_out = mean_y, scale_y
     else:
         mean_out, scale_out = mean_x, scale_x
     # var_xy = 1 / (1 / var_x + 1 / var_y)
     # mean_xy = ((mean_y / var_y) + (mean_x / var_x)) * var_xy
     distro = tfd.MultivariateNormalDiag(mean_out, scale_out)
     return (tf.transpose(distro.sample(self.n_particles), [1, 0, 2]),
             tf.ones((batch_size, self.n_particles), dtype=tf_floatx()) /
             self.n_particles)
예제 #18
0
 def _loss(self,
           y_input,
           y_target,
           samples,
           entropies,
           encoded_dist,
           decoded_dist,
           initial_state,
           effective_nb_timesteps,
           kl_weight=tf.convert_to_tensor(1.0, dtype=tf_floatx())):
     return tf.reduce_sum(
         self._breaked_loss(y_input, y_target, samples, entropies,
                            encoded_dist, decoded_dist, initial_state,
                            effective_nb_timesteps, kl_weight))
예제 #19
0
 def __call__(self,
              y_input,
              y_target,
              training=None,
              initial_state=None,
              effective_nb_timesteps=None,
              kl_weight=tf.convert_to_tensor(1.0, dtype=tf_floatx())):
     (samples, entropies,
      _), encoded_dist, decoded_dist, states = self._encode_and_decode(
          y_input, training=training, initial_state=initial_state)
     loss = self._loss(y_input, y_target, samples, entropies, encoded_dist,
                       decoded_dist, initial_state, effective_nb_timesteps,
                       kl_weight)
     return samples, encoded_dist, decoded_dist, loss, states
    def _forward_step(self, message_tm1, encoding_potentials_t):
        """
        :param message_tm1: The filtered means (N x D), covariances and precisions (N x D x D) of
         x_tm1|y_1:tm1. N is due to the batchs; D is the dimension of the embedding space.
        :param encoding_potentials_t: The gaussian potentials relating x_t with y_t. Again, there is a
        batched dimension.
        :return: the filtered distribution x_t|y_1:t (means, covariances and precisions), the predicted
        distribution x_t|y_1:tm1, and the covariance matrices cov(x_t, x_tm1| y_1:tm1).
        """
        # Ignore the predicted distribution and the conditional covariance from previous time step
        (means_tm1, covs_tm1, precs_tm1), _, _ = message_tm1
        encoding_means_t, encoding_precs_t = encoding_potentials_t

        # The means_tm1 is N x D (N is due to batches). Transform each vector (D, ) into a
        # row vector (1, D) by adding a new axis
        expanded_means_tm1 = means_tm1[:, tf.newaxis, :]
        means_t_given_tm1, covs_t_t_given_tm1, precs_t_t_given_tm1, covs_t_tm1_given_tm1 = (
            tf.map_fn(
                lambda x: self.predict_xt_given_tm1(mean_tm1=x[0], cov_tm1=x[1]),
                elems=(expanded_means_tm1, covs_tm1),
                # predict_xt_given_tm1 returns
                # mean_t_given_tm1, cov_t_t_given_tm1, tf.linalg.inv(cov_t_t_given_tm1), cov_t_tm1_given_tm1
                # Hence:
                dtype=(tf_floatx(), tf_floatx(), tf_floatx(), tf_floatx()),
                name='forward_predict_map'
            )
        )
        # TODO: there is an squeeze here. Sometimes we expand, sometimes we squeeze. Unify approach
        # to avoid unnecessary operations
        means_t_given_tm1 = tf.squeeze(means_t_given_tm1, axis=1)
        return (
            multiply_gaussians(means_t_given_tm1, precs_t_t_given_tm1,
                               encoding_means_t, encoding_precs_t),
            (means_t_given_tm1, covs_t_t_given_tm1, precs_t_t_given_tm1),
            covs_t_tm1_given_tm1
        )
예제 #21
0
def get_breaked_loss(y_input,
                     y_target,
                     gm,
                     gamma=1.0,
                     initial_state=None,
                     effective_nb_timesteps=None,
                     kl_weight=tf.convert_to_tensor(1.0, dtype=tf_floatx()),
                     clip_value=None):
    sde_nat_grads, breaked_loss, loss, final_state = gm.nat_grads(
        y_input,
        y_target,
        training=False,
        initial_state=initial_state,
        effective_nb_timesteps=effective_nb_timesteps,
        kl_weight=kl_weight)
    return loss, breaked_loss, final_state
예제 #22
0
    def loss(self,
             y_input,
             y_target,
             training=None,
             initial_state=None,
             effective_nb_of_timesteps=None,
             kl_weight=tf.convert_to_tensor(1.0, dtype=tf_floatx())):

        if effective_nb_of_timesteps is None:
            effective_nb_of_timesteps = y_target.shape[1]
        return self.__call__(y_input,
                             y_target,
                             training=training,
                             initial_state=initial_state,
                             effective_nb_timesteps=effective_nb_of_timesteps,
                             kl_weight=kl_weight)[-2:]
예제 #23
0
 def nat_grads(self,
               y_input,
               y_target,
               training,
               initial_state,
               effective_nb_timesteps,
               kl_weight=tf.convert_to_tensor(1.0, dtype=tf_floatx())):
     (samples, entropies,
      _), encoded_dist, decoded_dist, states = self._encode_and_decode(
          y_input, training=training, initial_state=initial_state)
     breaked_loss = self._breaked_loss(y_input, y_target, samples,
                                       entropies, encoded_dist,
                                       decoded_dist, initial_state,
                                       effective_nb_timesteps, kl_weight)
     loss = tf.reduce_sum(breaked_loss)
     natgrads = self.sde_nat_grads(samples, effective_nb_timesteps)
     return natgrads, breaked_loss, loss, states
예제 #24
0
 def test_loss(self, epoch, kl_scheduler):
     if not self.experiment.has_test:
         return 0
     avg_loss = tf.keras.metrics.Mean(name='loss', dtype=tf_floatx())
     kl_weight = kl_scheduler(epoch)
     for y in self.experiment.test_dataset:
         batch_loss = 0
         initial_state = None
         for x_chunk, y_chunk in self.tbptt_chunks_generator(y):
             chunk_loss, initial_state = self.model.loss(
                 x_chunk,
                 y_chunk,
                 training=False,
                 initial_state=initial_state,
                 effective_nb_of_timesteps=self.experiment.effective_len,
                 kl_weight=kl_weight)
             batch_loss += chunk_loss
         avg_loss.update_state(batch_loss)
     return avg_loss.result()
 def _compute_filtering_gammas(self, mean_tm1, cov_tm1, sqe_terms: _SqeAuxTems):
     """
     Deisenroth, 23 and 24
     Definition just after 4.29 of the PhD thesis
     """
     det_term = tf.map_fn(
         lambda x: 1 / tf.sqrt(tf.linalg.det(x)),
         (
                 tf.einsum('ij,ajk->aik', cov_tm1, sqe_terms.inv_Lambdas) +
                 tf.expand_dims(tf.eye(self.sde.dimension, dtype=tf_floatx()), 0)
         )
     )
     det_term = tf.expand_dims(det_term, 1)
     zeta = mean_tm1 - self.sde.iv_values()
     exp_term = tf.exp(-0.5 * tf.reduce_sum(
         tf.tensordot(zeta, tf.linalg.inv(cov_tm1 + sqe_terms.Lambdas), [[1], [1]]) *
         tf.expand_dims(zeta, 1),
         axis=2
     ))
     return sqe_terms.variances * det_term * tf.transpose(exp_term)
예제 #26
0
    def __init__(self,
                 units_list,
                 output_dim,
                 bidirectional=BIDIRECTIONAL,
                 use_scale_network=False,
                 tie_scale=False):
        super().__init__()
        # TODO
        conv_units_list = [32]  # [32, 64]
        self.tie_scale = tie_scale
        self.use_scale_network = use_scale_network
        self.kernel_size = 0
        self.dilation_rate = 0
        self.output_dim = output_dim

        # TODO: tie scale not used
        embedding_dim = output_dim
        self.embedding = DynEmbedding(units_list, embedding_dim, bidirectional,
                                      None)

        with tf.name_scope("output"):
            self.mean_output_layer = tf.keras.layers.TimeDistributed(
                Mlp([128], output_dim, dropout=0.0))
            if self.use_scale_network:
                # Note the dependency of the output_dim on tie_scale
                self.scale_output_layer = tf.keras.layers.TimeDistributed(
                    Mlp([128],
                        1 if tie_scale else output_dim,
                        dropout=0.0,
                        activation='softplus'))
            else:
                transform = gpflow.utilities.positive()
                self.scale = gpflow.Parameter(
                    0.1 *
                    tf.ones(1 if tie_scale else output_dim, dtype=tf_floatx()),
                    transform=transform,
                    name='encoder_scale')
예제 #27
0
def tbptt_chunks_generator(data_,
                           len_tbptt,
                           time_lag,
                           kernel_size,
                           dilation_rate,
                           noise_std=0,
                           do_bursts=True):

    target, data = build_delay_space(data_, 5, time_lag)
    prediction_lag = 0
    len_tbptt = min(data.shape[1], len_tbptt)
    do_bursts = False
    # Length that can be used to generate both the lagged version and the target signal from
    # the original data
    nb_drop = (kernel_size - 1) * dilation_rate
    nb_chunks = int(
        np.floor((data.shape[1] - max(prediction_lag + nb_drop, nb_drop)) /
                 len_tbptt))
    if nb_chunks < 1:
        raise RuntimeError('Cannot generate chunks in tbptt')
    for i in range(nb_chunks):
        # Add extra samples to handle that the convolution removes the nb_drop lattest ones
        inputs = data[:, (i * len_tbptt):((i + 1) * len_tbptt + nb_drop), :]
        # FIXME
        # output = tf.concat([
        #     data[:, (i * len_tbptt + nb_drop):((i + 1) * len_tbptt + nb_drop), :],
        #     data[:, (i * len_tbptt + prediction_lag + nb_drop):((i + 1) * len_tbptt + prediction_lag + nb_drop), :],
        # ], axis=-1)
        # output = output[:, nb_drop:(-nb_drop), :]
        output = target[:, (i * len_tbptt):((i + 1) * len_tbptt + nb_drop), :]
        # TODO: noise
        if do_bursts:
            inputs = add_bursts(inputs)
        elif noise_std > 0:
            inputs = inputs + tf.random.normal(
                inputs.shape, stddev=noise_std, dtype=tf_floatx())
        yield inputs, output
예제 #28
0
def optimize_sde_with_nat_grad(y_input,
                               y_target,
                               gm,
                               gamma=1.0,
                               initial_state=None,
                               effective_nb_timesteps=None,
                               kl_weight=tf.convert_to_tensor(
                                   1.0, dtype=tf_floatx()),
                               clip_value=None):
    sde_nat_grads, breaked_loss, loss, final_state = gm.nat_grads(
        y_input,
        y_target,
        training=False,
        initial_state=initial_state,
        effective_nb_timesteps=effective_nb_timesteps,
        kl_weight=kl_weight)
    if clip_value:
        sde_nat_grads = [
            tf.clip_by_value(nat_grad, -clip_value, clip_value)
            for nat_grad in sde_nat_grads
        ]

    thetas = SdeModel.standard_to_natural_params([
        gm.sde_model.diffusion._alphas, gm.sde_model.diffusion._betas,
        gm.sde_model.drift_svgp.q_mu, gm.sde_model.drift_svgp.q_sqrt
    ])
    new_xis = SdeModel.natural_to_standard_params(
        (thetas[0] - gamma * sde_nat_grads[0],
         thetas[1] - gamma * sde_nat_grads[1],
         thetas[2] - gamma * sde_nat_grads[2],
         thetas[3] - gamma * sde_nat_grads[3]))
    gm.sde_model.diffusion._alphas.assign(new_xis[0])
    gm.sde_model.diffusion._betas.assign(new_xis[1])
    gm.sde_model.drift_svgp.q_mu.assign(new_xis[2])
    gm.sde_model.drift_svgp.q_sqrt.assign(new_xis[3])

    return loss, breaked_loss, final_state
예제 #29
0
def optimize_nnets_and_hpars(y_input,
                             y_target,
                             optimizer,
                             gm: VAE,
                             initial_state=None,
                             effective_nb_timesteps=None,
                             kl_weight=tf.convert_to_tensor(
                                 1.0, dtype=tf_floatx())):
    vars = (gm.encoder.trainable_variables +
            list(gm.decoder.trainable_variables) +
            list(gm.sde_model.hyperpars))
    with tf.GradientTape(persistent=False,
                         watch_accessed_variables=False) as tape:
        tape.watch(vars)
        (samples, entropies, sampling_dist
         ), encoded_dist, decoded_dist, final_state = gm._encode_and_decode(
             y_input, training=True, initial_state=initial_state)
        bloss = gm._breaked_loss(y_input, y_target, samples, entropies,
                                 encoded_dist, decoded_dist, initial_state,
                                 effective_nb_timesteps, kl_weight)
        loss = tf.reduce_sum(bloss)
    grads = tape.gradient(loss, vars)
    optimizer.apply_gradients(zip(grads, vars))
    return loss, final_state
예제 #30
0
    def _breaked_loss(self,
                      y_input,
                      y_target,
                      samples,
                      entropies,
                      encoded_dist,
                      decoded_dist,
                      initial_state,
                      effective_nb_timesteps,
                      kl_weight=tf.convert_to_tensor(1.0, dtype=tf_floatx())):
        # This implements equation (4.17) from the thesis (A part of which is detailed in Eq. (4.18))

        _, x0_stats = self._unzip_initial_state(initial_state)
        x0_mean, x0_scale = self._handle_x0_stats(x0_stats, encoded_dist)

        (decoded_means, decoded_scales_diag) = decoded_dist
        # Reduce all the entropies (one per batch) taking into account the effective_nb_timesteps
        reduced_entropy = (effective_nb_timesteps /
                           y_target.shape[1]) * tf.reduce_mean(entropies)

        ly = self._loglikelihood_y_given_x(y_target, decoded_means,
                                           decoded_scales_diag,
                                           effective_nb_timesteps)
        lx, mpenalty, alphaterm, [lxs,
                                  lx0s] = self._variational_loglikelihood_x(
                                      samples, x0_mean, x0_scale,
                                      effective_nb_timesteps)
        kl = self.sde_model.kullback_leibler(self.minimum_nats)

        # Note the minus so that this is a loss (instead of lower bound!)
        # TODO: kl is 0
        return tf.stack([
            -ly, -kl_weight * lx, -kl_weight * mpenalty,
            -kl_weight * alphaterm, -kl_weight * reduced_entropy,
            kl_weight * kl
        ])