def _variance(self): variance = (tf.square(self.concentration * self.mixing_rate / (self.mixing_concentration - 1.)) / (self.mixing_concentration - 2.)) if self.allow_nan_stats: nan = tf.fill(self.batch_shape_tensor(), dtype_util.as_numpy_dtype(self.dtype)(np.nan), name="nan") return tf.where(self.mixing_concentration > 2., variance, nan) else: return distribution_util.with_dependencies([ assert_util.assert_less( tf.ones([], self.dtype) * 2., self.mixing_concentration, message= "variance undefined when `mixing_concentration` <= 2"), ], variance)
def compress(condition, a, axis=None): """Compresses `a` by selecting values along `axis` with `condition` true. Uses `tf.boolean_mask`. Args: condition: 1-d array of bools. If `condition` is shorter than the array axis (or the flattened array if axis is None), it is padded with False. a: array_like. Could be an ndarray, a Tensor or any object that can be converted to a Tensor using `tf.convert_to_tensor`. axis: Optional. Axis along which to select elements. If None, `condition` is applied on flattened array. Returns: An ndarray. Raises: ValueError: if `condition` is not of rank 1. """ condition = asarray(condition, dtype=bool) a = asarray(a) if condition.ndim != 1: raise ValueError('condition must be a 1-d array.') # `np.compress` treats scalars as 1-d arrays. if a.ndim == 0: a = ravel(a) if axis is None: a = ravel(a) axis = 0 if axis < 0: axis += a.ndim assert axis >= 0 and axis < a.ndim # `tf.boolean_mask` requires the first dimensions of array and condition to # match. `np.compress` pads condition with False when it is shorter. condition_t = condition.data a_t = a.data if condition.shape[0] < a.shape[axis]: padding = tf.fill([a.shape[axis] - condition.shape[0]], False) condition_t = tf.concat([condition_t, padding], axis=0) return utils.tensor_to_ndarray( tf.boolean_mask(tensor=a_t, mask=condition_t, axis=axis))
def _mean(self): mean = _broadcast_to_shape(self.loc, self._sample_shape()) df = _broadcast_to_shape(self.df[..., tf.newaxis], tf.shape(input=mean)) if self.allow_nan_stats: nan = np.array(np.nan, dtype=self.dtype.as_numpy_dtype()) return tf.where(df > 1., mean, tf.fill(tf.shape(input=mean), nan, name="nan")) else: with tf.control_dependencies([ assert_util.assert_less( tf.cast(1., self.dtype), df, message="mean not defined for components of df <= 1"), ]): return tf.identity(mean)
def _mode(self): k = tf.cast(self.event_shape_tensor()[0], self.dtype) mode = (self.concentration - 1.) / (self.total_concentration[..., tf.newaxis] - k) if self.allow_nan_stats: nan = tf.fill(tf.shape(input=mode), np.array(np.nan, dtype=self.dtype.as_numpy_dtype()), name="nan") return tf.where( tf.reduce_all(input_tensor=self.concentration > 1., axis=-1), mode, nan) return distribution_util.with_dependencies([ assert_util.assert_less( tf.ones([], self.dtype), self.concentration, message="Mode undefined when any concentration <= 1"), ], mode)
def _initial_discount_rates(bond_cashflows, bond_cashflow_times, present_values, name='initial_discount_rates'): """Constructs a guess for the initial rates as the yields to maturity.""" n = len(bond_cashflows) groups = [] for i in range(n): groups.append(tf.fill(tf.shape(bond_cashflows[i]), i)) bond_cashflows = tf.concat(bond_cashflows, axis=0) bond_cashflow_times = tf.concat(bond_cashflow_times, axis=0) groups = tf.concat(groups, axis=0) return cashflows.yields_from_pv(bond_cashflows, bond_cashflow_times, present_values, groups=groups, name=name)
def fill(self, value, size, dtype, shape, name=None): """Fill a fresh batched Tensor of the given shape and dtype with `value`. Args: value: Scalar to fill with. size: Scalar `int` `Tensor` specifying the number of VM threads. dtype: `tf.DType` of the zeros to be returned. shape: Rank 1 `int` `Tensor`, the per-thread value shape. name: Optional name for the op. Returns: result: `Tensor` of `dtype` `value`s with shape `[size, *shape]` """ with tf.name_scope(name or 'VM.fill'): size = tf.convert_to_tensor(value=size, name='size') shape = tf.convert_to_tensor(value=shape, name='shape', dtype=size.dtype) return tf.fill(tf.concat([[size], shape], axis=0), value=tf.cast(value, dtype=dtype))
def estimate_tail(func, target, shape, dtype): """Estimates approximate tail quantiles.""" dtype = tf.as_dtype(dtype) shape = tf.convert_to_tensor(shape, tf.int32) target = tf.convert_to_tensor(target, dtype) opt = tf.keras.optimizers.Adam(learning_rate=.1) tails = tf.Variable(tf.zeros(shape, dtype=dtype), trainable=False, name="tails") loss = best_loss = tf.fill(shape, tf.constant(float("inf"), dtype=dtype)) while tf.reduce_any(loss == best_loss): with tf.GradientTape(watch_accessed_variables=False) as tape: tape.watch(tails) loss = abs(func(tails) - target) best_loss = tf.minimum(best_loss, loss) gradient = tape.gradient(loss, tails) opt.apply_gradients([(gradient, tails)]) return tails.value()
def _setup(self): """Setup relevant tensors for efficient computations.""" reset_dates = [] contract_idx = [] daycount_fractions = [] for i in range(self._batch_size): instr_reset_dates = dates.PeriodicSchedule( start_date=self._start_date[i] + self._rate_tenor, end_date=self._end_date[i], tenor=self._rate_tenor, holiday_calendar=self._holiday_calendar, roll_convention=dates.BusinessDayConvention.FOLLOWING).dates() # Append the start_date of the contract instr_reset_dates = dates.DateTensor.concat( [self._start_date[i].expand_dims(axis=0), instr_reset_dates], axis=0) # Add one day beyond the end of the delivery period to compute the # accrual on the last day of the delivery. one_period_past_enddate = self._end_date[i] + self._rate_tenor instr_reset_dates = dates.DateTensor.concat([ instr_reset_dates, one_period_past_enddate.expand_dims(axis=0) ], axis=0) instr_daycount_fractions = rc.get_daycount_fraction( instr_reset_dates[:-1], instr_reset_dates[1:], self._daycount_convention, self._dtype) reset_dates.append(instr_reset_dates[:-1]) daycount_fractions.append(instr_daycount_fractions) contract_idx.append(tf.fill(tf.shape(instr_daycount_fractions), i)) self._reset_dates = dates.DateTensor.concat(reset_dates, axis=0) self._accrual_start_dates = self._reset_dates self._accrual_end_dates = self._reset_dates + self._rate_tenor self._accrual_daycount = rc.get_daycount_fraction( self._accrual_start_dates, self._accrual_end_dates, self._daycount_convention, self._dtype) self._daycount_fractions = tf.concat(daycount_fractions, axis=0) self._contract_idx = tf.concat(contract_idx, axis=0)
def mutate(current_state, log_scalings, num_steps, inverse_temperature): """Mutate the state using a Transition kernel.""" with tf.name_scope('mutate_states'): scalings = tf.exp(log_scalings) kernel = make_kernel_fn(make_tempered_target_log_prob_fn( prior_log_prob_fn, likelihood_log_prob_fn, inverse_temperature), current_state, scalings, seed=seed_stream) pkr = kernel.bootstrap_results(current_state) kernel_log_accept_ratio, _ = gather_mh_like_result(pkr) def mutate_onestep(i, state, pkr, log_accept_prob_sum): next_state, next_kernel_results = kernel.one_step( state, pkr) kernel_log_accept_ratio, _ = gather_mh_like_result(pkr) log_accept_prob = tf.minimum(kernel_log_accept_ratio, 0.) log_accept_prob_sum = log_add_exp(log_accept_prob_sum, log_accept_prob) return i + 1, next_state, next_kernel_results, log_accept_prob_sum ( _, next_state, next_kernel_results, log_accept_prob_sum ) = tf.while_loop( cond=lambda i, *args: i < num_steps, body=mutate_onestep, loop_vars=( tf.zeros([], dtype=tf.int32), current_state, pkr, # we accumulate the acceptance probability in log space. tf.fill( ps.shape(kernel_log_accept_ratio), tf.constant(-np.inf, kernel_log_accept_ratio.dtype))), parallel_iterations=parallel_iterations) _, kernel_target_log_prob = gather_mh_like_result( next_kernel_results) avg_log_accept_prob_per_particle = log_accept_prob_sum - tf.math.log( tf.cast(num_steps + 1, log_accept_prob_sum.dtype)) return (next_state, avg_log_accept_prob_per_particle, kernel_target_log_prob)
def _mode(self): mode = (self.concentration1 - 1.) / (self.total_concentration - 2.) if self.allow_nan_stats: nan = tf.fill(self.batch_shape_tensor(), dtype_util.as_numpy_dtype(self.dtype)(np.nan), name="nan") is_defined = tf.logical_and(self.concentration1 > 1., self.concentration0 > 1.) return tf.where(is_defined, mode, nan) return distribution_util.with_dependencies([ assert_util.assert_less( tf.ones([], dtype=self.dtype), self.concentration1, message="Mode undefined for concentration1 <= 1."), assert_util.assert_less( tf.ones([], dtype=self.dtype), self.concentration0, message="Mode undefined for concentration0 <= 1.") ], mode)
def _validate_args_control_deps(bond_cashflows, bond_cashflow_times, pv_settle_times): """Returns assertions for the validity of the arguments.""" cashflows_are_strictly_increasing = [] cashflow_after_settlement = [] final_cashflow_is_the_largest = [] for bond_index, bond_cashflow in enumerate(bond_cashflows): times = bond_cashflow_times[bond_index] time_difference = times[1:] - times[:-1] cashflows_are_strictly_increasing.append( tf.debugging.assert_positive(time_difference)) cashflow_after_settlement.append( tf.debugging.assert_greater(times[0], pv_settle_times[bond_index])) final_cashflow_is_the_largest.append( tf.debugging.assert_greater( tf.fill(tf.shape(bond_cashflow[:-1]), bond_cashflow[-1]), bond_cashflow[:-1])) return (cashflow_after_settlement + cashflows_are_strictly_increasing + final_cashflow_is_the_largest)
def _mode(self): concentration = tf.convert_to_tensor(self.concentration) rate = tf.convert_to_tensor(self.rate) mode = (concentration - 1.) / rate if self.allow_nan_stats: assertions = [] else: assertions = [ assert_util.assert_less( tf.ones([], self.dtype), concentration, message="Mode not defined when any concentration <= 1.") ] with tf.control_dependencies(assertions): nan = tf.fill(self._batch_shape_tensor(concentration=concentration, rate=rate), dtype_util.as_numpy_dtype(self.dtype)(np.nan), name="nan") return tf.where(concentration > 1., mode, nan)
def make_prior(num_topics, initial_value): """Create the prior distribution. Args: num_topics: Number of topics. initial_value: The starting value for the prior parameters. Returns: prior: A `callable` that returns a `tf.distribution.Distribution` instance, the prior distribution. """ concentration = tfp.util.TransformedVariable(tf.fill([1, num_topics], initial_value), tfb.Softplus(), name="concentration") return tfd.Dirichlet(concentration=tfp.util.DeferredTensor( concentration, _clip_dirichlet_parameters), name="topics_prior")
def test_tril(self, use_default): if tf.executing_eagerly(): self.skipTest( 'b/169882656 Too many warnings are issued in eager logs') cov = 0.9 * tf.ones([3, 3]) + 0.1 * tf.eye(3) scale = tf.linalg.cholesky(cov) mv_tril = tfd.MultivariateNormalTriL(loc=[1., 2., 3.], scale_tril=scale) if use_default: momentum_distribution = None step_size = 0.3 else: momentum_distribution = _CompositeMultivariateNormalPrecisionFactorLinearOperator( # TODO(b/170015229) Don't use the covariance as inverse scale, # it is the wrong preconditioner. precision_factor=tf.linalg.LinearOperatorFullMatrix(cov), ) step_size = 1.1 nuts_kernel = tfp.experimental.mcmc.PreconditionedNoUTurnSampler( target_log_prob_fn=mv_tril.log_prob, momentum_distribution=momentum_distribution, step_size=step_size, max_tree_depth=4) draws = tfp.mcmc.sample_chain(120, tf.zeros(3), kernel=nuts_kernel, seed=test_util.test_seed(), trace_fn=None) ess = tfp.mcmc.effective_sample_size( draws[-100:], filter_threshold=0, filter_beyond_positive_pairs=False) # TODO(b/170015229): These and other tests like it, which assert ess is # greater than some number, were all passing, even though the preconditioner # was the wrong one. Why is that? A guess is that since there are *many* # ways to have larger ess, these tests don't really test correctness. # Perhaps remove all tests like these. if not use_default: self.assertAllClose(ess, tf.fill([3], 100.)) else: self.assertLess(self.evaluate(tf.reduce_min(ess)), 100.)
def _get_rpn_samples(self, match_results): """Computes anchor labels. This function performs subsampling for foreground (fg) and background (bg) anchors. Args: match_results: A integer tensor with shape [N] representing the matching results of anchors. (1) match_results[i]>=0, meaning that column i is matched with row match_results[i]. (2) match_results[i]=-1, meaning that column i is not matched. (3) match_results[i]=-2, meaning that column i is ignored. Returns: score_targets: a integer tensor with the a shape of [N]. (1) score_targets[i]=1, the anchor is a positive sample. (2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is don't care (ignore). """ sampler = ( balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( positive_fraction=self._rpn_fg_fraction, is_static=False)) # indicator includes both positive and negative labels. # labels includes only positives labels. # positives = indicator & labels. # negatives = indicator & !labels. # ignore = !indicator. indicator = tf.greater(match_results, -2) labels = tf.greater(match_results, -1) samples = sampler.subsample( indicator, self._rpn_batch_size_per_im, labels) positive_labels = tf.where( tf.logical_and(samples, labels), tf.constant(2, dtype=tf.int32, shape=match_results.shape), tf.constant(0, dtype=tf.int32, shape=match_results.shape)) negative_labels = tf.where( tf.logical_and(samples, tf.logical_not(labels)), tf.constant(1, dtype=tf.int32, shape=match_results.shape), tf.constant(0, dtype=tf.int32, shape=match_results.shape)) ignore_labels = tf.fill(match_results.shape, -1) return (ignore_labels + positive_labels + negative_labels, positive_labels, negative_labels)
def test_diag(self, use_default): """Test that a diagonal multivariate normal can be effectively sampled from. Note that the effective sample size is expected to be exactly 100: this is because the step size is tuned well enough that a single HMC step takes a point to nearly the antipodal point, which causes a negative lag 1 autocorrelation, and the effective sample size calculation cuts off when the autocorrelation drops below zero. Args: use_default: bool, whether to use a custom momentum distribution, or the default. """ mvn = tfd.MultivariateNormalDiag(loc=[1., 2., 3.], scale_diag=[0.1, 1., 10.]) if use_default: momentum_distribution = None step_size = 0.1 else: momentum_distribution = tfde.MultivariateNormalPrecisionFactorLinearOperator( precision_factor=mvn.scale, ) step_size = 0.3 hmc_kernel = tfp.experimental.mcmc.PreconditionedHamiltonianMonteCarlo( target_log_prob_fn=mvn.log_prob, momentum_distribution=momentum_distribution, step_size=step_size, num_leapfrog_steps=10) draws = tfp.mcmc.sample_chain(110, tf.zeros(3), kernel=hmc_kernel, seed=test_util.test_seed(), trace_fn=None) ess = tfp.mcmc.effective_sample_size( draws[-100:], filter_threshold=0, filter_beyond_positive_pairs=False) if not use_default: self.assertAllClose(ess, tf.fill([3], 100.)) else: self.assertLess(self.evaluate(tf.reduce_min(ess)), 100.)
def test_kahan_precision(self, jit=False): maybe_jit = lambda f: f if jit: self.skip_if_no_xla() maybe_jit = tf.function(jit_compile=True) stream = test_util.test_seed_stream() n = 20_000 samps = tfd.Poisson(rate=1.).sample(n, seed=stream()) log_rate = tf.fill([n], tfd.Normal(0, .2).sample(seed=stream())) pois = tfd.Poisson(log_rate=log_rate) lp_fn = maybe_jit(tfd.Independent(pois, reinterpreted_batch_ndims=1, experimental_use_kahan_sum=True).log_prob) lp = lp_fn(samps) pois64 = tfd.Poisson(log_rate=tf.cast(log_rate, tf.float64)) lp64 = tfd.Independent(pois64, reinterpreted_batch_ndims=1).log_prob( tf.cast(samps, tf.float64)) # Evaluate together to ensure we use the same samples. lp, lp64 = self.evaluate((tf.cast(lp, tf.float64), lp64)) # Fails ~75% CPU, 1-75% GPU --vary_seed runs w/o experimental_use_kahan_sum. self.assertAllClose(lp64, lp, rtol=0., atol=.01)
def testMarginalLikelihoodGradientIsDefined(self): num_particles = 16 seeds = samplers.split_seed(test_util.test_seed(), n=3) initial_state = self.evaluate( WeightedParticles( particles=samplers.normal([num_particles], seed=seeds[0]), log_weights=tf.fill([num_particles], -tf.math.log(float(num_particles))))) def propose_and_update_log_weights_fn(_, weighted_particles, transition_scale, seed=None): proposal_dist = tfd.Normal(loc=weighted_particles.particles, scale=1.) transition_dist = tfd.Normal(loc=weighted_particles.particles, scale=transition_scale) proposed_particles = proposal_dist.sample(seed=seed) return WeightedParticles( particles=proposed_particles, log_weights=(weighted_particles.log_weights + transition_dist.log_prob(proposed_particles) - proposal_dist.log_prob(proposed_particles))) def marginal_logprob(transition_scale): kernel = SequentialMonteCarlo( propose_and_update_log_weights_fn=functools.partial( propose_and_update_log_weights_fn, transition_scale=transition_scale)) state, results = kernel.one_step( state=initial_state, kernel_results=kernel.bootstrap_results(initial_state), seed=seeds[1]) state, results = kernel.one_step(state=state, kernel_results=results, seed=seeds[2]) return results.accumulated_log_marginal_likelihood _, grad_lp = tfp.math.value_and_gradient(marginal_logprob, 1.5) self.assertIsNotNone(grad_lp) self.assertNotAllZero(grad_lp)
def _prepare_grid(*, times, time_step, dtype): """Prepares grid of times for path generation. Args: times: Rank 1 `Tensor` of increasing positive real values. The times at which the path points are to be evaluated. time_step: Rank 0 real `Tensor`. Maximal distance between points in resulting grid. dtype: `tf.Dtype` of the input and output `Tensor`s. Returns: Tuple `(all_times, mask, time_points)`. `all_times` is a 1-D real `Tensor` containing all points from 'times` and the uniform grid of points between `[0, times[-1]]` with grid size equal to `time_step`. The `Tensor` is sorted in ascending order and may contain duplicates. `mask` is a boolean 1-D `Tensor` of the same shape as 'all_times', showing which elements of 'all_times' correspond to THE values from `times`. Guarantees that times[0]=0 and mask[0]=False. `time_indices`. An integer `Tensor` of the same shape as `times` indicating `times` indices in `all_times`. """ grid = tf.range(0.0, times[-1], time_step, dtype=dtype) all_times = tf.concat([times, grid], axis=0) # Remove duplicate points all_times = tf.unique(all_times).y # Sort sequence. Identify the time indices of interest # TODO(b/169400743): use tf.sort instead of argsort and casting when XLA # float64 support is extended for tf.sort args = tf.argsort(tf.cast(all_times, dtype=tf.float32)) all_times = tf.gather(all_times, args) time_indices = tf.searchsorted(all_times, times, out_type=tf.int32) # Create a boolean mask to identify the iterations that have to be recorded. mask_sparse = tf.sparse.SparseTensor(indices=tf.expand_dims(tf.cast( time_indices, dtype=tf.int64), axis=1), values=tf.fill(times.shape, True), dense_shape=all_times.shape) mask = tf.sparse.to_dense(mask_sparse) return all_times, mask, time_indices
def _mode(self): a = self.concentration1 b = self.concentration0 mode = ((a - 1) / (a * b - 1))**(1. / a) if self.allow_nan_stats: nan = tf.fill(self.batch_shape_tensor(), dtype_util.as_numpy_dtype(self.dtype)(np.nan), name="nan") is_defined = (self.concentration1 > 1.) & (self.concentration0 > 1.) return tf.where(is_defined, mode, nan) return distribution_util.with_dependencies([ assert_util.assert_less( tf.ones([], dtype=self.concentration1.dtype), self.concentration1, message="Mode undefined for concentration1 <= 1."), assert_util.assert_less( tf.ones([], dtype=self.concentration0.dtype), self.concentration0, message="Mode undefined for concentration0 <= 1.") ], mode)
def test_tril(self): cov = 0.9 * tf.ones([3, 3]) + 0.1 * tf.eye(3) scale = tf.linalg.cholesky(cov) mv_tril = tfd.MultivariateNormalTriL(loc=[1., 2., 3.], scale_tril=scale) if self.use_default_momentum_distribution: momentum_distribution = None else: momentum_distribution = tfde.MultivariateNormalPrecisionFactorLinearOperator( # TODO(b/170015229) Don't use the covariance as inverse scale, # it is the wrong preconditioner. precision_factor=tf.linalg.LinearOperatorFullMatrix(cov), ) hmc_kernel = tfp.experimental.mcmc.PreconditionedHamiltonianMonteCarlo( target_log_prob_fn=mv_tril.log_prob, momentum_distribution=momentum_distribution, step_size=0.2, num_leapfrog_steps=10) draws = tfp.mcmc.sample_chain( 120, tf.zeros(3), kernel=hmc_kernel, seed=test_util.test_seed(), trace_fn=None) ess = tfp.mcmc.effective_sample_size(draws[-100:], filter_threshold=0, filter_beyond_positive_pairs=False) # TODO(b/170015229): These and other tests like it, which assert ess is # greater than some number, were all passing, even though the preconditioner # was the wrong one. Why is that? A guess is that since there are *many* # ways to have larger ess, these tests don't really test correctness. # Perhaps remove all tests like these. if not self.use_default_momentum_distribution: self.assertAllClose(ess, tf.fill([3], 100.)) else: self.assertLess(self.evaluate(tf.reduce_min(ess)), 100.)
def build(self, var_list): """Initialize optimizer variables. Args: var_list: list of model variables to build Ftrl variables on. """ super().build(var_list) if hasattr(self, '_built') and self._built: return self._accumulators = [] self._linears = [] for var in var_list: self._accumulators.append( self.add_variable_from_reference( model_variable=var, variable_name='accumulator', initial_value=tf.cast( tf.fill(dims=var.shape, value=self.initial_accumulator_value), dtype=var.dtype))) self._linears.append( self.add_variable_from_reference( model_variable=var, variable_name='linear')) self._built = True
def testOffsetWorksCorrectly(self): n = int(1e5) offset = tf.fill([n], 1.0) [ model_matrix, response, model_coefficients_true, linear_response_true, ] = self.make_dataset(n=n, d=3, link='probit', offset=offset) model_coefficients, linear_response, is_converged, _ = tfp.glm.fit( model_matrix, response, tfp.glm.BernoulliNormalCDF(), offset=offset, fast_unsafe_numerics=self.fast, maximum_iterations=20) [ model_coefficients_, linear_response_, is_converged_, model_coefficients_true_, linear_response_true_, ] = self.evaluate([ model_coefficients, linear_response, is_converged, model_coefficients_true, linear_response_true, ]) self.assertTrue(is_converged_) avg_response_diff = np.mean(linear_response_ - linear_response_true_) self.assertNear(0., avg_response_diff, err=3e-3) self.assertAllClose(model_coefficients_true_, model_coefficients_, atol=0.03, rtol=0.15)
def test_diag(self, use_default): """Test that a diagonal multivariate normal can be effectively sampled from. Args: use_default: bool, whether to use a custom momentum distribution, or the default. """ mvn = tfd.MultivariateNormalDiag(loc=[1., 2., 3.], scale_diag=[0.1, 1., 10.]) if use_default: momentum_distribution = None step_size = 0.1 else: momentum_distribution = _CompositeMultivariateNormalPrecisionFactorLinearOperator( precision_factor=mvn.scale, ) step_size = 1.1 nuts_kernel = tfp.experimental.mcmc.PreconditionedNoUTurnSampler( target_log_prob_fn=mvn.log_prob, momentum_distribution=momentum_distribution, step_size=step_size, max_tree_depth=4) draws = tfp.mcmc.sample_chain(110, tf.zeros(3), kernel=nuts_kernel, seed=test_util.test_seed(), trace_fn=None) ess = tfp.mcmc.effective_sample_size( draws[-100:], filter_threshold=0, filter_beyond_positive_pairs=False) if not use_default: self.assertAllClose(ess, tf.fill([3], 100.)) else: self.assertLess(self.evaluate(tf.reduce_min(ess)), 100.)
def _log_prob(self, x): x = tf.convert_to_tensor(x, name='x') right_indices = tf.minimum( tf.size(self.outcomes) - 1, tf.reshape( tf.searchsorted(self.outcomes, values=tf.reshape(x, shape=[-1]), side='right'), dist_util.prefer_static_shape(x))) use_right_indices = self._is_equal_or_close( x, tf.gather(self.outcomes, indices=right_indices)) left_indices = tf.maximum(0, right_indices - 1) use_left_indices = self._is_equal_or_close( x, tf.gather(self.outcomes, indices=left_indices)) log_probs = self._categorical.log_prob( tf1.where(use_left_indices, left_indices, right_indices)) should_be_neg_inf = tf.broadcast_to( tf.logical_not(use_left_indices | use_right_indices), shape=dist_util.prefer_static_shape(log_probs)) return tf1.where( should_be_neg_inf, tf.fill(dist_util.prefer_static_shape(should_be_neg_inf), dtype_util.as_numpy_dtype(log_probs.dtype)(-np.inf)), log_probs)
def call(self, lm_output, sentence_output, lm_label_ids, lm_label_weights, sentence_labels): """Implements call() for the layer.""" lm_label_weights = tf.cast(lm_label_weights, tf.float32) lm_output = tf.cast(lm_output, tf.float32) sentence_output = tf.cast(sentence_output, tf.float32) mask_label_loss = losses.weighted_sparse_categorical_crossentropy_loss( labels=lm_label_ids, predictions=lm_output, weights=lm_label_weights) sentence_loss = losses.weighted_sparse_categorical_crossentropy_loss( labels=sentence_labels, predictions=sentence_output) loss = mask_label_loss + sentence_loss batch_shape = tf.slice(tf.shape(sentence_labels), [0], [1]) # TODO(hongkuny): Avoids the hack and switches add_loss. final_loss = tf.fill(batch_shape, loss) # TODO(b/122840926): metrics use distribution strategy merge_call() and do # not work with tf.function(compile=True). Either fix this issue or move # metric aggregation outside the model. metric_outputs = self._add_metrics(lm_output, lm_label_ids, lm_label_weights, mask_label_loss, sentence_output, sentence_labels, sentence_loss) return final_loss, metric_outputs
def one_step(self, state, kernel_results, seed=None): """Takes one Sequential Monte Carlo inference step. Args: state: instance of `tfp.experimental.mcmc.WeightedParticles` representing the current particles with (log) weights. The `log_weights` must be a float `Tensor` of shape `[num_particles, b1, ..., bN]`. The `particles` may be any structure of `Tensor`s, each of which must have shape `concat([log_weights.shape, event_shape])` for some `event_shape`, which may vary across components. kernel_results: instance of `tfp.experimental.mcmc.SequentialMonteCarloResults` representing results from a previous step. seed: Optional seed for reproducible sampling. Returns: state: instance of `tfp.experimental.mcmc.WeightedParticles` representing new particles with (log) weights. kernel_results: instance of `tfp.experimental.mcmc.SequentialMonteCarloResults`. """ with tf.name_scope(self.name): with tf.name_scope('one_step'): seed = samplers.sanitize_seed(seed) proposal_seed, resample_seed = samplers.split_seed(seed) state = WeightedParticles(*state) # Canonicalize. num_particles = ps.size0(state.log_weights) # Propose new particles and update weights for this step, unless it's # the initial step, in which case, use the user-provided initial # particles and weights. proposed_state = self.propose_and_update_log_weights_fn( # Propose state[t] from state[t - 1]. ps.maximum(0, kernel_results.steps - 1), state, seed=proposal_seed) is_initial_step = ps.equal(kernel_results.steps, 0) # TODO(davmre): this `where` assumes the state size didn't change. state = tf.nest.map_structure( lambda a, b: tf.where(is_initial_step, a, b), state, proposed_state) normalized_log_weights = tf.nn.log_softmax(state.log_weights, axis=0) # Every entry of `log_weights` differs from `normalized_log_weights` # by the same normalizing constant. We extract that constant by # examining an arbitrary entry. incremental_log_marginal_likelihood = ( state.log_weights[0] - normalized_log_weights[0]) do_resample = self.resample_criterion_fn(state) # Some batch elements may require resampling and others not, so # we first do the resampling for all elements, then select whether to # use the resampled values for each batch element according to # `do_resample`. If there were no batching, we might prefer to use # `tf.cond` to avoid the resampling computation on steps where it's not # needed---but we're ultimately interested in adaptive resampling # for statistical (not computational) purposes, so this isn't a # dealbreaker. resampled_particles, resample_indices = weighted_resampling.resample( state.particles, state.log_weights, self.resample_fn, seed=resample_seed) uniform_weights = tf.fill( ps.shape(state.log_weights), value=-tf.math.log( tf.cast(num_particles, state.log_weights.dtype))) (resampled_particles, resample_indices, log_weights) = tf.nest.map_structure( lambda r, p: ps.where(do_resample, r, p), (resampled_particles, resample_indices, uniform_weights), (state.particles, _dummy_indices_like(resample_indices), normalized_log_weights)) return ( WeightedParticles(particles=resampled_particles, log_weights=log_weights), SequentialMonteCarloResults( steps=kernel_results.steps + 1, parent_indices=resample_indices, incremental_log_marginal_likelihood=( incremental_log_marginal_likelihood), accumulated_log_marginal_likelihood=( kernel_results.accumulated_log_marginal_likelihood + incremental_log_marginal_likelihood), seed=seed))
def _build_sub_tree(self, directions, integrator, current_step_meta_info, nsteps, initial_state, continue_tree, not_divergence, momentum_state_memory): with tf.name_scope('build_sub_tree'): batch_shape = prefer_static.shape( current_step_meta_info.init_energy) # We never want to select the inital state if MULTINOMIAL_SAMPLE: init_weight = tf.fill( batch_shape, tf.constant( -np.inf, dtype=current_step_meta_info.init_energy.dtype)) else: init_weight = tf.zeros(batch_shape, dtype=TREE_COUNT_DTYPE) init_momentum_cumsum = [ tf.zeros_like(x) for x in initial_state.momentum ] initial_state_candidate = TreeDoublingStateCandidate( state=initial_state.state, target=initial_state.target, target_grad_parts=initial_state.target_grad_parts, energy=initial_state.target, weight=init_weight) energy_diff_sum = tf.zeros_like(current_step_meta_info.init_energy, name='energy_diff_sum') [ _, energy_diff_tree_sum, momentum_tree_cumsum, leapfrogs_taken, final_state, candidate_tree_state, final_continue_tree, final_not_divergence, momentum_state_memory, ] = tf.while_loop( cond=lambda iter_, energy_diff_sum, init_momentum_cumsum, # pylint: disable=g-long-lambda leapfrogs_taken, state, state_c, continue_tree, not_divergence, momentum_state_memory: ( (iter_ < nsteps) & tf.reduce_any(continue_tree)), body=lambda iter_, energy_diff_sum, init_momentum_cumsum, # pylint: disable=g-long-lambda leapfrogs_taken, state, state_c, continue_tree, not_divergence, momentum_state_memory: (self._loop_build_sub_tree( directions, integrator, current_step_meta_info, iter_, energy_diff_sum, init_momentum_cumsum, leapfrogs_taken, state, state_c, continue_tree, not_divergence, momentum_state_memory)), loop_vars=( tf.zeros([], dtype=tf.int32, name='iter'), energy_diff_sum, init_momentum_cumsum, tf.zeros(batch_shape, dtype=TREE_COUNT_DTYPE), initial_state, initial_state_candidate, continue_tree, not_divergence, momentum_state_memory, ), parallel_iterations=self.parallel_iterations) return ( candidate_tree_state, final_state, final_not_divergence, final_continue_tree, energy_diff_tree_sum, momentum_tree_cumsum, leapfrogs_taken, )
def calibration(*, prices: types.RealTensor, strikes: types.RealTensor, expiries: types.RealTensor, forwards: types.RealTensor, is_call_options: types.BoolTensor, beta: types.RealTensor, nu: types.RealTensor, rho: types.RealTensor, volatility_type: SabrImpliedVolatilityType = None, approximation_type: SabrApproximationType = None, volatility_based_calibration: bool = True, alpha: types.RealTensor = None, alpha_lower_bound: types.RealTensor = None, alpha_upper_bound: types.RealTensor = None, calibrate_beta: bool = False, beta_lower_bound: types.RealTensor = 0.0, beta_upper_bound: types.RealTensor = 1.0, nu_lower_bound: types.RealTensor = 0.0, nu_upper_bound: types.RealTensor = 1.0, rho_lower_bound: types.RealTensor = -1.0, rho_upper_bound: types.RealTensor = 1.0, optimizer_fn: Callable[..., types.RealTensor] = None, tolerance: types.RealTensor = 1e-6, maximum_iterations: types.RealTensor = 100, validate_args: bool = False, dtype: tf.DType = None, name: str = None) -> CalibrationResult: """Calibrates the SABR model using European option prices. The SABR model specifies the risk neutral dynamics of the underlying as the following set of stochastic differential equations: ``` dF = sigma F^beta dW_1 dsigma = nu sigma dW_2 dW1 dW2 = rho dt F(0) = f sigma(0) = alpha ``` where F(t) represents the value of the forward price as a function of time, and sigma(t) is the volatility. Given a set of European option prices, this function estimates the SABR model parameters which best describe the input data. Calibration is done using the closed-form approximations for European option pricing. #### Example ```python import tf_quant_finance as tff import tensorflow.compat.v2 as tf dtype = np.float64 # Set some market conditions. observed_prices = np.array( [[20.09689284, 10.91953054, 4.25012702, 1.11561839, 0.20815853], [3.34813209, 6.03578711, 10.2874194, 16.26824328, 23.73850935]], dtype=dtype) strikes = np.array( [[80.0, 90.0, 100.0, 110.0, 120.0], [80.0, 90.0, 100.0, 110.0, 120.0]], dtype=dtype) expiries = np.array([[0.5], [1.0]], dtype=dtype) forwards = 100.0 is_call_options = np.array([[True], [False]]) # Calibrate the model. # In this example, we are calibrating a SABR model using the lognormal # volatility approximation for implied volatility, and we explicitly fix the # betas ourselves. beta = np.array([0.5, 0.5], dtype=dtype) models, is_converged, _ = tff.models.sabr.approximations.calibration( prices=observed_prices, strikes=strikes, expiries=expiries, forwards=forwards, is_call_options=is_call_options, beta=beta, calibrate_beta=False, nu=np.array([1.0, 1.0], dtype=dtype), nu_lower_bound=0.0, nu_upper_bound=10.0, rho=np.array([0.0, 0.0], dtype=dtype), rho_lower_bound=-0.75, rho_upper_bound=0.75, maximum_iterations=1000) # This will return two `SabrModel`s, where: # Model 1 has alpha = 1.5, beta = 0.5, volvol = 0.33, and rho = 0.1 # Model 2 has alpha = 2.5, beta = 0.5, volvol = 0.66, and rho = -0.1 ``` Args: prices: Real `Tensor` of shape [batch_size, num_strikes] specifying the observed options prices. Here, `batch_size` refers to the number of SABR models calibrated in this invocation. strikes: Real `Tensor` of shape [batch_size, num_strikes] specifying the strike prices of the options. expiries: Real `Tensor` of shape compatible with [batch_size, num_strikes] specifying the options expiries. forwards: Real `Tensor` of shape compatible with [batch_size, num_strikes] specifying the observed forward prices/rates. is_call_options: Boolean `Tensor` of shape compatible with [batch_size, num_strikes] specifying whether or not the prices correspond to a call option (=True) or a put option (=False). beta: Real `Tensor` of shape [batch_size], specifying the initial estimate of the model `beta`. Values must satisify 0 <= `beta` <= 1 nu: Real `Tensor` of shape [batch_size], specifying the initial estimate of the vol-vol parameter. Values must satisfy 0 <= `nu`. rho: Real `Tensor` of shape [batch_size], specifying the initial estimate of the correlation between the forward price and the volatility. Values must satisfy -1 < `rho` < 1. volatility_type: Either SabrImpliedVolatility.NORMAL or LOGNORMAL. Default value: `None` which maps to `LOGNORMAL` approximation_type: Instance of `SabrApproxmationScheme`. Default value: `None` which maps to `HAGAN`. volatility_based_calibration: Boolean. If `True`, then the options prices are first converted to implied volatilities, and the calibration is then performed by minimizing the difference between input implied volatilities and the model implied volatilities. Otherwise, the calibration is performed by minimizing the mean-squared-loss of the *log1p* of the input and estimated European options prices. Default value: True alpha: Real `Tensor` of shape [batch_size], specifying the initial estimate of initial level of the volatility. Values must be strictly positive. If this is not provided, then an initial value will be estimated, along with lower and upper bounds. Default value: `None`, indicating that the routine should try to find a reasonable initial estimate. alpha_lower_bound: Real `Tensor` compatible with that of `alpha`, specifying the lower bound for the calibrated value. This is ignored if `alpha` is `None`. Default value: `None`. alpha_upper_bound: Real `Tensor` compatible with that of `alpha`, specifying the upper bound for the calibrated value. This is ignored if `alpha` is `None`. Default value: `None`. calibrate_beta: Boolean value indicating whether or not the `beta` parameters should be calibrated. If `True`, then the `beta_lower_bound` and `beta_upper_bound` must be specified. If `False`, then the model will use the values specified in `beta`. Default value: `False`. beta_lower_bound: Only used if `calibrate_beta` is True. Real `Tensor` compatible with that of `beta`, specifying the lower bound for the calibrated value. Default value: 0.0. beta_upper_bound: Only used if `calibrate_beta` is True. Real `Tensor` compatible with that of `beta`, specifying the upper bound for the calibrated value. Defalut value: 1.0 nu_lower_bound: Real `Tensor` compatible with that of `nu`, specifying the lower bound for the calibrated value. Default value: 0.0. nu_upper_bound: Real `Tensor` compatible with that of `nu`, specifying the lower bound for the calibrated value. Default value: 1.0. rho_lower_bound: Real `Tensor` compatible with that of `rho`, specifying the lower bound for the calibrated value. Default value: -1.0. rho_upper_bound: Real `Tensor` compatible with that of `rho`, specifying the upper bound for the calibrated value. Default value: 1.0. optimizer_fn: Optional Python callable which implements the algorithm used to minimize the objective function during calibration. It should have the following interface: result = optimizer_fn(value_and_gradients_function, initial_position, tolerance, max_iterations) `value_and_gradients_function` is a Python callable that accepts a point as a real `Tensor` and returns a tuple of `Tensor`s of real dtype containing the value of the function and its gradient at that point. 'initial_position' is a real `Tensor` containing the starting point of the optimization, 'tolerance' is a real scalar `Tensor` for stopping tolerance for the procedure and `max_iterations` specifies the maximum number of iterations. `optimizer_fn` should return a namedtuple containing the items: `position` (a tensor containing the optimal value), `converged` (a boolean indicating whether the optimize converged according the specified criteria), `failed` (a boolean indicating if the optimization resulted in a failure), `num_iterations` (the number of iterations used), and `objective_value` ( the value of the objective function at the optimal value). The default value for `optimizer_fn` is None and conjugate gradient algorithm is used. Default value: `None` - indicating conjugate gradient minimizer. tolerance: Scalar `Tensor` of real dtype. The absolute tolerance for terminating the iterations. Default value: 1e-6. maximum_iterations: Scalar positive integer `Tensor`. The maximum number of iterations during the optimization. Default value: 100. validate_args: Boolean value indicating whether or not to validate the shape and values of the input arguments, at the potential expense of performance degredation. Defalut value: False. dtype: The default dtype to use when converting values to `Tensor`s. Default value: `None`, which means that default dtypes inferred by TensorFlow are used. name: String. The name to give to the ops created by this function. Default value: `None`, which maps to the default name 'sabr_calibration'. Returns: A Tuple of three elements. The first is a `CalibrationResult` holding the calibrated alpha, beta, volvol, and rho, where alpha[i] corresponds to the calibrated `alpha` of the i-th batch, etc. The second and third elements contains the optimization status (whether the optimization algorithm succeeded in finding the optimal point based on the specified convergance criteria) and the number of iterations performed. """ if approximation_type is None: approximation_type = SabrApproximationType.HAGAN if volatility_type is None: volatility_type = SabrImpliedVolatilityType.LOGNORMAL name = name or 'sabr_calibration' with tf.name_scope(name): prices = tf.convert_to_tensor(prices, dtype=dtype, name='prices') dtype = dtype or prices.dtype batch_size = tf.shape(prices)[0] strikes = tf.convert_to_tensor(strikes, dtype=dtype, name='strikes') expiries = tf.convert_to_tensor(expiries, dtype=dtype, name='expiries') forwards = tf.convert_to_tensor(forwards, dtype=dtype, name='expiries') is_call_options = tf.convert_to_tensor(is_call_options, name='is_call', dtype=tf.bool) if optimizer_fn is None: optimizer_fn = optimizer.conjugate_gradient_minimize if alpha is None: # We set the initial value of alpha to be s.t. alpha * F^(beta - 1) is # on the order of 10%. initial_alpha_guess = tf.math.reduce_mean(forwards) alpha = tf.fill(dims=[batch_size], value=initial_alpha_guess) alpha = tf.pow(alpha, 1.0 - beta) * 0.1 alpha_lower_bound = alpha * 0.1 alpha_upper_bound = alpha * 10.0 else: alpha_lower_bound = tf.convert_to_tensor(alpha_lower_bound, dtype=dtype) alpha_upper_bound = tf.convert_to_tensor(alpha_upper_bound, dtype=dtype) alpha = _assert_parameter_valid(validate_args, alpha, shape=[batch_size], lower_bound=alpha_lower_bound, upper_bound=alpha_upper_bound, message='`alpha` is invalid!') initial_alpha = _to_unconstrained(alpha, alpha_lower_bound, alpha_upper_bound) nu_lower_bound = tf.convert_to_tensor(nu_lower_bound, dtype=dtype) nu_upper_bound = tf.convert_to_tensor(nu_upper_bound, dtype=dtype) nu = _assert_parameter_valid(validate_args, nu, shape=[batch_size], lower_bound=nu_lower_bound, upper_bound=nu_upper_bound, message='`nu` is invalid!') initial_nu = _to_unconstrained(nu, nu_lower_bound, nu_upper_bound) rho_lower_bound = tf.convert_to_tensor(rho_lower_bound, dtype=dtype) rho_upper_bound = tf.convert_to_tensor(rho_upper_bound, dtype=dtype) rho = _assert_parameter_valid(validate_args, rho, shape=[batch_size], lower_bound=rho_lower_bound, upper_bound=rho_upper_bound, message='`rho` is invalid!') initial_rho = _to_unconstrained(rho, rho_lower_bound, rho_upper_bound) beta = tf.convert_to_tensor(beta, dtype=dtype) beta_lower_bound = tf.convert_to_tensor(beta_lower_bound, dtype=dtype) beta_upper_bound = tf.convert_to_tensor(beta_upper_bound, dtype=dtype) beta = _assert_parameter_valid(validate_args, beta, shape=[batch_size], lower_bound=beta_lower_bound, upper_bound=beta_upper_bound, message='`beta` is invalid!') if calibrate_beta: initial_beta = _to_unconstrained(beta, beta_lower_bound, beta_upper_bound) initial_x = tf.concat( [initial_alpha, initial_nu, initial_rho, initial_beta], axis=0) else: initial_x = tf.concat([initial_alpha, initial_nu, initial_rho], axis=0) optimizer_arg_handler = _OptimizerArgHandler( batch_size=batch_size, alpha_lower_bound=alpha_lower_bound, alpha_upper_bound=alpha_upper_bound, nu_lower_bound=nu_lower_bound, nu_upper_bound=nu_upper_bound, rho_lower_bound=rho_lower_bound, rho_upper_bound=rho_upper_bound, calibrate_beta=calibrate_beta, beta=beta, beta_lower_bound=beta_lower_bound, beta_upper_bound=beta_upper_bound) if volatility_based_calibration: loss_function = _get_loss_for_volatility_based_calibration( prices=prices, strikes=strikes, expiries=expiries, forwards=forwards, is_call_options=is_call_options, volatility_type=volatility_type, approximation_type=approximation_type, dtype=dtype, optimizer_arg_handler=optimizer_arg_handler) else: # Price based calibration. loss_function = _get_loss_for_price_based_calibration( prices=prices, strikes=strikes, expiries=expiries, forwards=forwards, is_call_options=is_call_options, volatility_type=volatility_type, approximation_type=approximation_type, dtype=dtype, optimizer_arg_handler=optimizer_arg_handler) optimization_result = optimizer_fn(loss_function, initial_position=initial_x, tolerance=tolerance, max_iterations=maximum_iterations) calibration_parameters = optimization_result.position calibrated_alpha = optimizer_arg_handler.get_alpha( calibration_parameters) calibrated_nu = optimizer_arg_handler.get_nu(calibration_parameters) calibrated_rho = optimizer_arg_handler.get_rho(calibration_parameters) calibrated_beta = optimizer_arg_handler.get_beta( calibration_parameters) return (CalibrationResult(alpha=calibrated_alpha, beta=calibrated_beta, volvol=calibrated_nu, rho=calibrated_rho), optimization_result.converged, optimization_result.num_iterations)
def fn(value): return tf.cast(tf.fill(dims=new_shape, value=value), tf.float32)