示例#1
0
 def get_auxiliary_ratio(self, index):
     if self.extrapolate_auxiliary_ratios:
         return np.power(index + 1., AUX_RATIO_POWER_LAW)
     else:
         if not self._initialized:
             raise CodingError("Coder has not been initialized yet, please call"
                               "update_auxiliary_variance_ratios() first"
                               " or use extrapolation")
         if index >= tf.shape(self.aux_variable_variance_ratios)[0]:
             raise CodingError("KL divergence higher than auxiliary variables can account for. "
                               "Update auxiliary variable ratios with high-enough KL divergence."
                               "Maximum possible number of partitions is {}."
                               "Requested {}".format(self.aux_variable_variance_ratios.shape[0], index + 1))
         return self.aux_variable_variance_ratios[index]
def get_r_pstar(log_ratios, t_mass, p_mass, r_buffer_size, dtype=tf.float32):
    t_mass = tf.cast(t_mass, dtype=tf.float64)
    p_mass = tf.cast(p_mass, dtype=tf.float64)
    ratios_np = tf.exp(log_ratios).numpy()
    t_cummass_np = tf.exp(tf.math.cumulative_logsumexp(t_mass)).numpy()
    p_cummass_np = tf.exp(tf.math.cumulative_logsumexp(p_mass)).numpy()
    p_zero = float(1. - np.exp(tf.reduce_logsumexp(p_mass)))
    pstar_buffer = tf.Variable(tf.zeros((r_buffer_size, ), dtype=dtype),
                               trainable=False)
    r_buffer = tf.Variable(tf.zeros((r_buffer_size, ), dtype=dtype),
                           trainable=False)
    r = 1.
    r_buffer[0].assign(r)
    i = 1
    for r_ind, r_next in enumerate(ratios_np):
        if r_next < r:
            continue
        p_cum = p_zero + (p_cummass_np[r_ind - 1] if r_ind > 0 else 0.)
        t_cum = t_cummass_np[r_ind - 1] if r_ind > 0 else 0.
        # if final sample, the logarithm should be -infinity
        assert (r_ind != ratios_np.shape[0] - 1
                or math.isclose(r_next, (1. - t_cum) / (1. - p_cum),
                                rel_tol=1e-5))
        if r_ind == ratios_np.shape[0] - 1:
            interval = r_buffer_size - i
        else:
            interval = min(
                r_buffer_size - i,
                int(
                    math.ceil(
                        np.log((r_next - (1. - t_cum) / (1. - p_cum)) /
                               (r - (1. - t_cum) / (1. - p_cum))) //
                        np.log(p_cum))))

        # Work in log for numerical stability
        r_slice = -tf.exp(
            np.log(p_cum) * (1. + tf.range(interval, dtype=dtype)) + np.log(
                (1. - t_cum) / (1. - p_cum) - r)) + (1. - t_cum) / (1. - p_cum)
        r_buffer[i:i + interval].assign(r_slice)
        pstar_buffer[i - 1:i + interval -
                     1].assign((1. - p_cum) *
                               r_buffer[i - 1:i + interval - 1] + t_cum)
        r = np.power(p_cum,
                     interval) * (r - (1. - t_cum) /
                                  (1. - p_cum)) + (1. - t_cum) / (1. - p_cum)
        i += interval
        if i == r_buffer_size:
            pstar_buffer[r_buffer_size - 1].assign((1. - p_cum) * r + t_cum)
            break
        if r_ind == ratios_np.shape[0] - 1:
            raise CodingError(
                'R Buffer incomplete after processing all samples. This is a bug.'
            )
    return r_buffer, pstar_buffer
示例#3
0
    def merge(self, *args, shape=None, seed=42):
        """
        Inverse operation to split
        :return:
        """

        if shape is None:
            raise CodingError("Shape cannot be None!")

        # We first merge the blocks back
        tensors = [tf.concat(blocks, axis=0) for blocks in args]

        # Check that all tensors have the same shape now
        num_dims = tensors[0].shape[0]

        for tensor in tensors:
            if tf.rank(tensor) != 1:
                raise CodingError("All supplied tensors to merge must be rank 1!")

            if tensor.shape[0] != num_dims:
                raise CodingError("All tensors must have the same number of dimensions!")

        # We will inverse permute the indices and gather using them
        # to ensure that every block is un-shuffled the same way
        tf.random.set_seed(seed)
        indices = tf.range(num_dims, dtype=tf.int64)
        indices = tf.random.shuffle(indices)
        indices = tf.math.invert_permutation(indices)[:, None]

        tensors = [tf.gather_nd(tensor, indices)
                   for tensor in tensors]

        # Reshape each tensor appropriately
        tensors = [tf.reshape(tensor, shape) for tensor in tensors]

        return tensors
示例#4
0
    def split(self, *args, seed=42):
        """
        Splits the arguments into conformal blocks
        :return:
        """

        tensor_shape = args[0].shape
        num_tensors = len(args)

        flattened = []

        # Check if the shapes are alright
        for tensor in args:
            if tensor.shape != tensor_shape:
                raise CodingError("All tensor arguments supplied to split "
                                  "must have the same batch dimensions!")

            flattened.append(tf.reshape(tensor, [-1]))

        # Total number of dimensions for each tensor
        num_dims = flattened[0].shape[0]

        # We will permute the indices and gather using them to ensure that every block is
        # shuffled the same way
        tf.random.set_seed(seed)
        indices = tf.range(num_dims, dtype=tf.int64)
        indices = tf.random.shuffle(indices)[:, None]

        # Shuffle each tensor the same way
        flattened = [tf.gather_nd(flat, indices) for flat in flattened]

        # Split tensors into blocks

        # Calculate the number of blocks
        num_blocks = num_dims // self.block_size
        num_blocks += (0 if num_dims % self.block_size == 0 else 1)

        all_blocks = []
        for tensor in flattened:

            blocks = []
            for i in range(0, num_dims, self.block_size):
                # The minimum ensures that we do get indices out of bounds
                blocks.append(tensor[i:min(i + self.block_size, num_dims)])

            all_blocks.append(blocks)

        return all_blocks
示例#5
0
    def encode_block(self,
                     target_dist,
                     coding_dist,
                     seed,
                     update_sampler=False,
                     numpy=True):
        if target_dist.loc.shape[0] != 1:
            raise CodingError("For encoding, batch size must be 1.")

        total_kl = tf.reduce_sum(tfd.kl_divergence(target_dist, coding_dist))
        print('Encoding latent variable with KL={}'.format(total_kl))
        num_aux_variables = tf.cast(
            tf.math.ceil(total_kl / self.kl_per_partition), tf.int32)

        # We iterate backward until the second entry in ratios. The first entry is 1.,
        # in which case we just draw the final sample.
        n_dims = len(target_dist.loc.shape)
        cumulative_auxiliary_variance = 0.
        iteration = 0
        for i in range(num_aux_variables - 1, -1, -1):
            aux_variable_variance_ratio = self.get_auxiliary_ratio(i)
            auxiliary_var = aux_variable_variance_ratio * (tf.math.pow(
                coding_dist.scale, 2) - cumulative_auxiliary_variance)

            auxiliary_coder = get_auxiliary_coder(coder=coding_dist,
                                                  auxiliary_var=auxiliary_var)
            cumulative_auxiliary_coder = get_auxiliary_coder(
                coder=coding_dist,
                auxiliary_var=auxiliary_var + cumulative_auxiliary_variance)
            auxiliary_target = get_auxiliary_target(
                target=target_dist,
                coder=coding_dist,
                auxiliary_var=auxiliary_var + cumulative_auxiliary_variance)

            if iteration > 0:
                samples = self.get_pseudo_random_sample(
                    auxiliary_coder, self.n_samples, beam_indices,
                    seed + iteration)
                combined_samples = beams + samples  # n_samples x n_beams x sample_shape
                log_probs = tf.reduce_sum(
                    auxiliary_target.log_prob(combined_samples) -
                    cumulative_auxiliary_coder.log_prob(combined_samples),
                    axis=range(2, n_dims + 2))
                flat_log_probs = tf.reshape(log_probs, [-1])
                sorted_ind_1d = tf.argsort(flat_log_probs,
                                           direction='DESCENDING')
                n_current_beams = beams.shape[0]
                best_ind_beam = sorted_ind_1d[:self.n_beams] % n_current_beams
                best_ind_aux = sorted_ind_1d[:self.n_beams] // n_current_beams
                assert (log_probs[best_ind_aux[0], best_ind_beam[0]] ==
                        flat_log_probs[sorted_ind_1d[0]])

                beam_ind = tf.stack((best_ind_aux, best_ind_beam), axis=1)
                beams = tf.gather_nd(combined_samples, beam_ind)
                beam_indices = tf.concat((tf.gather_nd(
                    beam_indices[:, :iteration],
                    best_ind_beam[:, None]), best_ind_aux[:, None]),
                                         axis=1)
            else:
                samples = self.get_pseudo_random_sample(
                    auxiliary_coder, self.n_samples,
                    tf.constant([[]], dtype=tf.int32), seed + iteration)[:, 0]
                log_probs = tf.reduce_sum(
                    auxiliary_target.log_prob(samples) -
                    cumulative_auxiliary_coder.log_prob(samples),
                    axis=range(1, n_dims + 1))
                sorted_ind = tf.argsort(log_probs, direction='DESCENDING')
                beams = tf.gather_nd(samples, sorted_ind[:self.n_beams, None])
                beam_indices = sorted_ind[:self.n_beams, None]

            iteration += 1
            cumulative_auxiliary_variance += auxiliary_var

        target_sample = target_dist.sample()
        target_entropy = tf.reduce_sum(
            target_dist.log_prob(target_sample) -
            coding_dist.log_prob(target_sample))
        print('Target entropy={}, log_density={}'.format(
            target_entropy,
            tf.reduce_sum(
                target_dist.log_prob(beams[0] + coding_dist.loc) -
                coding_dist.log_prob(beams[0] + coding_dist.loc))))

        indices = beam_indices[0, :]
        if numpy:
            indices = indices.numpy()

        return list(indices), beams[0] + coding_dist.loc
示例#6
0
    def encode_block(self, target_dist, coding_dist, seed, update_sampler=False, verbose=False, numpy=True):
        if target_dist.loc.shape[0] != 1:
            raise CodingError("For encoding, batch size must be 1.")

        indices = []

        total_kl = tf.reduce_sum(tfd.kl_divergence(target_dist, coding_dist))
        print('Encoding latent variable with KL={}'.format(total_kl))
        num_aux_variables = tf.cast(tf.math.ceil(total_kl / self.kl_per_partition), tf.int32)

        # We iterate backward until the second entry in ratios. The first entry is 1.,
        # in which case we just draw the final sample.
        for i in range(num_aux_variables - 1, 0, -1):
            aux_variable_variance_ratio = self.get_auxiliary_ratio(i)
            auxiliary_var = aux_variable_variance_ratio * tf.math.pow(coding_dist.scale, 2)

            auxiliary_target = get_auxiliary_target(target=target_dist,
                                                    coder=coding_dist,
                                                    auxiliary_var=auxiliary_var)

            auxiliary_coder = get_auxiliary_coder(coder=coding_dist,
                                                  auxiliary_var=auxiliary_var)

            if update_sampler:
                self.sampler.update(auxiliary_target, auxiliary_coder)
                auxiliary_sample = auxiliary_target.sample()
                print('Sampler updated')
            else:
                index, auxiliary_sample = self.sampler.coded_sample(target=auxiliary_target,
                                                                    coder=auxiliary_coder,
                                                                    seed=seed)
                if verbose:
                    print(f'Auxiliary sample {i} found at index {index}')

                if numpy:
                    index = index.numpy()

                indices.append(index)
            seed += 1

            target_dist = get_conditional_target(target=target_dist,
                                                 coder=coding_dist,
                                                 auxiliary_var=auxiliary_var,
                                                 auxiliary_sample=auxiliary_sample)

            coding_dist = get_conditional_coder(coder=coding_dist,
                                                auxiliary_var=auxiliary_var,
                                                auxiliary_sample=auxiliary_sample)

        # Sample the last auxiliary variable
        if update_sampler:
            self.sampler.update(target_dist, coding_dist)
            sample = target_dist.sample()
            print('Sampler updated')
        else:
            index, sample = self.sampler.coded_sample(target=target_dist,
                                                      coder=coding_dist,
                                                      seed=seed)
            if verbose:
                print('Auxiliary sample found at index {}'.format(index))

            if numpy:
                index = index.numpy()

            indices.append(index)

        return indices, sample
def encode_gaussian_importance_sample(t_loc,
                                      t_scale,
                                      p_loc,
                                      p_scale,
                                      coding_bits,
                                      seed,
                                      log_weighting_fn=None,
                                      alpha=float('inf')):
    """
    Encodes a single sample from a Gaussian target distribution using another Gaussian coding distribution.
    Note that the runtime of this function is O(e^KL(q || p)), hence it is the job of the caller to potentially
    partition a larger Gaussian into smaller codable chunks.

    :param coding_bits: number of bits to use to code each sample
    :param t_loc: location parameter of the target Gaussian
    :param t_scale: scale parameter of the target Gaussian
    :param p_loc: location parameter of the coding/proposal Gaussian
    :param p_scale: scale parameter of the coding/proposal Gaussian
    :param seed: seed that defines the infinite string of random samples from the coding distribution.
    :param alpha: draw the seed according to the L_alpha norm. alpha=1 results in sampling the atomic distribution
    defined by the importance weights, and alpha=inf just selects the sample with the maximal importance weight. Must
    be in the range [1, inf)
    :return: (sample, index) - tuple containing the sample and the
    """

    if alpha < 1.:
        raise CodingError(
            f"Alpha must be in the range [1, inf), but {alpha} was given!")

    # Fix seed
    tf.random.set_seed(seed)

    # Standardize the target w.r.t the coding distribution
    t_loc = (t_loc - p_loc) / p_scale
    t_scale = t_scale / p_scale

    target = tfd.Normal(loc=t_loc, scale=t_scale)

    proposal = tfd.Normal(loc=tf.zeros_like(p_loc),
                          scale=tf.ones_like(p_scale))

    # We need to draw approximately e^KL samples to be guaranteed a low bias sample
    num_samples = tf.cast(tf.math.ceil(tf.exp(coding_bits * tf.math.log(2.))),
                          tf.int32)

    # Draw e^KL samples
    samples = proposal.sample(num_samples)

    # Calculate the log-unnormalized importance_weights
    if log_weighting_fn is None:
        log_importance_weights = tf.reduce_sum(
            target.log_prob(samples) - proposal.log_prob(samples),
            axis=range(1,
                       tf.rank(t_loc) + 1))
    else:
        log_importance_weights = log_weighting_fn(samples)

    # If we are using the infinity norm, we can just take the argmax as a shortcut
    if tf.math.is_inf(alpha):
        index = tf.argmax(log_importance_weights)

    # If we are using any other alpha, we just calculate the atomic distribution
    else:
        # Sample index using the Gumbel-max trick
        perturbed = alpha * log_importance_weights + stateless_gumbel_sample(
            log_importance_weights.shape, seed + 1)

        index = tf.argmax(perturbed)

    chosen_sample = samples[index, ...]

    # Rescale the sample
    chosen_sample = p_scale * chosen_sample + p_loc

    return index, chosen_sample
def gaussian_rejection_sample_small(t_dist,
                                    p_dist,
                                    sample_buffer_size,
                                    r_buffer_size,
                                    sample_generator,
                                    seed=42069):
    """
    Encodes a single sample from a Gaussian target distribution using another Gaussian coding distribution.
    Note that the runtime of this function is O(e^KL(q || p)), hence it is the job of the caller to potentially
    partition a larger Gaussian into smaller codable chunks.

    :param t_dist: the target Gaussian
    :param p_dist: the coding/proposal Gaussian
    :param sample_buffer_size: buffer size of the samples
    :param r_buffer_size: buffer size of rejection sampling, samples beyond this index are treated as if they were drawn
     at with index
    :param seed: seed that defines the infinite string of random samples from the coding distribution.
    :return: (sample, index) - tuple containing the sample and the index
    """
    assert (r_buffer_size % sample_buffer_size == 0)
    assert t_dist.loc.shape.as_list() == p_dist.loc.shape.as_list()
    log_ratios, t_mass, p_mass = get_t_p_mass(t_dist,
                                              p_dist,
                                              n_samples=100,
                                              oversampling=100)
    r_buffer, pstar_buffer = get_r_pstar(log_ratios,
                                         t_mass,
                                         p_mass,
                                         r_buffer_size=r_buffer_size)
    kl = tf.reduce_sum(tfp.distributions.kl_divergence(t_dist, p_dist))
    if kl >= 20.:
        raise CodingError(
            'KL divergence={} is too high for rejection sampling'.format(kl))

    i = 0
    for _ in range(int(r_buffer_size // sample_buffer_size)):
        sample_ratios = sample_generator.get_ratios(t_dist,
                                                    p_dist,
                                                    seed=seed +
                                                    i // sample_buffer_size)
        accepted = (tf.exp(sample_ratios) - r_buffer[i:i+sample_buffer_size]) / \
                   (1. - pstar_buffer[i:i+sample_buffer_size]) + tf.random.uniform(shape=sample_ratios.shape)
        accepted_ind = tf.where(accepted > 0.)
        if accepted_ind.shape[0] > 0:
            index = int(accepted_ind[0, 0])
            return i + index, sample_generator.get_index(index)
        i += sample_buffer_size

    # If not finished in buffer, we accept anything above ratio r
    r = r_buffer[-1]
    while True:
        sample_ratios = sample_generator.get_ratios(t_dist,
                                                    p_dist,
                                                    seed=seed +
                                                    i // sample_buffer_size)
        accepted_ind = tf.where(sample_ratios > tf.math.log(r))
        if accepted_ind.shape[0] > 0:
            index = int(accepted_ind[0, 0])
            return i + index, sample_generator.get_index(index)
        else:
            i += sample_buffer_size