def __init__( self, pvector: tf.Tensor, distribution: tfd.Distribution = tfd.Normal(loc=0., scale=1.) ) -> tfd.Distribution: """ Generate the flow for the given parameter vector. This would be typically the output of a neural network. To use it as a loss function see `bernstein_flow.losses.BernsteinFlowLoss`. :param pvector: The paramter vector. :type pvector: Tensor :param distribution: The base distribution to use. :type distribution: Distribution :returns: The transformed distribution (normalizing flow) :rtype: Distribution """ num_dist = pvector.shape[1] flows = [] for d in range(num_dist): pv = pvector[:, d] flow = BernsteinFlow(pv) flows.append(flow) joint = tfd.JointDistributionSequential(flows, name='joint_bs_flows') super().__init__(joint, name='MultivariateBernsteinFlow')
def lossf(pars, data): thetasb, thetab = pars nuis_sb = [tfd.Normal(loc=thetasb[i], scale=1) for i in range(Npars)] poises_sb = [tfd.Poisson(rate=s + b + thetasb[i]) for i in range(Npars)] joint_sb = tfd.JointDistributionSequential(poises_sb + nuis_sb) nuis_b = [tfd.Normal(loc=thetab[i], scale=1) for i in range(Npars)] poises_b = [tfd.Poisson(rate=b + thetab[i]) for i in range(Npars)] joint_b = tfd.JointDistributionSequential(poises_b + nuis_b) # Tensor shape matching debugging #print("[sample_shape, batch_shape, event_shape]") #print("joint_sb.batch_shape:",joint_sb.batch_shape[0]) #print("joint_sb.event_shape:",joint_sb.event_shape[0]) #print("samples shapes:", [k.shape for k in samples0][0]) # The broadcasting works like this: # 1. Define n = len(batch_shape) + len(event_shape). (For scalar distributions, len(event_shape)=0.) # 2. If the input tensor t has fewer than n dimensions, pad its shape by adding dimensions of size 1 on the left until it has exactly n dimensions. Call the resulting tensor t'. # 3. Broadcast the n rightmost dimensions of t' against the [batch_shape, event_shape] of the distribution you're computing a log_prob for. In more detail: for the dimensions where t' already matches the distribution, do nothing, and for the dimensions where t' has a singleton, replicate that singleton the appropriate number of times. Any other situation is an error. (For scalar distributions, we only broadcast against batch_shape, since event_shape = [].) # 4. Now we're finally able to compute the log_prob. The resulting tensor will have shape [sample_shape, batch_shape], where sample_shape is defined to be any dimensions of t or t' to the left of the n-rightmost dimensions: sample_shape = shape(t)[:-n]. # We have, e.g. # joint_sb.batch_shape: (10000, 5) # joint_sb.event_shape: () # and we want to compute (10000, 5) log probabilities, broadcasting # 10000 samples over the "5" dimension. # So for that, according to the above rules, the input sample tensor shape # should be: (10000, 1) # And the resulting log-probability tensor should have shape # (10000, 5) qsb = -2 * (joint_sb.log_prob(data)) qb = -2 * (joint_b.log_prob(data)) #print("qsb.shape:", qsb.shape) #print("qsb_s.shape:", qsb_s.shape) total_loss = tf.math.reduce_sum(qsb) + tf.math.reduce_sum(qb) # First return: total loss function value # Second return: 'true' parameter values (for convergence calculations) # Third return: extra variables whose final values you want to know at the end of the optimisation return total_loss, (thetasb, thetab), (qsb, qb)
def logp(x, y, z): X = tf.constant(z) u = 0 jds = tfd.JointDistributionSequential([ tfd.Normal(loc=x, scale=1.), # m tfd.Normal(loc=y, scale=1.), # b lambda b, m: tfd.Normal(loc=m * X + b, scale=1.) # Y ]) return jds.log_prob(x, y, z)
def __call__(self, x): n_sample = len(x) joint_prob = tfd.JointDistributionSequential([ tfd.Independent( tfd.Normal( loc = tf.zeros((n_sample, n_factor), dtype=self.dtype), scale = 1.0), reinterpreted_batch_ndims=1), lambda eta: tfd.Independent( tfd.Bernoulli( logits= self.intercept + eta @ tf.transpose(self.loading), dtype=self.dtype), reinterpreted_batch_ndims=1)]) joint_prob._to_track=self return joint_prob
def __call__(self, x): n_sample = len(x) c, d = create_cd(self.n_category, self.dtype) joint_prob = tfd.JointDistributionSequential([ tfd.Independent( tfd.Normal( loc = tf.zeros((n_sample, n_factor), dtype=self.dtype), scale = 1.0), reinterpreted_batch_ndims=1), lambda eta: tfd.Independent( tfd.Categorical( probs = grm_irf(eta, self.intercept, self.loading, c, d), dtype = self.dtype), reinterpreted_batch_ndims=1)]) joint_prob._to_track=self return joint_prob
def gen_mixture(self, out): pvs = self.slice_parameter_vectors(out) mixtures = [] for pv in pvs: logits, locs, log_scales = pv scales = tf.math.softmax(log_scales) mixtures.append( tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical(logits=logits), components_distribution=tfd.Normal(loc=locs, scale=scales))) joint = tfd.JointDistributionSequential(mixtures, name='joint_mixtures') blkws = tfd.Blockwise(joint) return blkws
#s_in = tf.constant([5],dtype=float) s_in2 = tf.expand_dims(s_in, 0) s = tf.broadcast_to(s_in2, shape=(N, len(s_in))) b = tf.expand_dims(tf.constant(50, dtype=float), 0) # Nuisance parameters (independent Gaussians) zero = tf.expand_dims(tf.constant(0, dtype=float), 0) nuis0 = [tfd.Normal(loc=zero, scale=1) for i in range(Npars)] # Bunch of independent Poisson distributions that we want to combine poises0 = [tfd.Poisson(rate=b) for i in range(Npars)] poises0s = [tfd.Poisson(rate=s_in + b) for i in range(Npars)] # Construct joint distributions joint0 = tfd.JointDistributionSequential(poises0 + nuis0) joint0s = tfd.JointDistributionSequential(poises0s + nuis0) # Generate background-only pseudodata to be fitted samples0 = joint0.sample(N) # Generate signal+background pseudodata to be fitted samples0s = joint0s.sample(N) # We want the sample shapes to dimensionally match the versions of # the distributions that have free parameters: # [sample_shape, batch_shape, event_shape] print("[sample_shape, batch_shape, event_shape]") print("joint0.batch_shape:", joint0.batch_shape[0]) print("joint0.event_shape:", joint0.event_shape[0])
def nig(mean, scale1, alpha, beta): pd = tfd.JointDistributionSequential([ tfd.Independent(tfd.InverseGaussian(scale1, (alpha**2 - beta**2)**.5)), lambda mix: tfd.Normal(loc=mean + beta * mix, scale=mix) ]) return pd.log_prob([pd.sample()[0], x])
from functools import * from tensorflow_probability import bijectors as tfb from tensorflow_probability import distributions as tfd from tensorflow_probability.python.internal import dtype_util from tensorflow_probability.python.internal import prefer_static as ps from tensorflow_probability.python.internal import tensorshape_util tf.enable_v2_behavior() X = tf.constant(1.0) scale1 = 1.2 alpha = 12.3 beta = 0.2 mean = 0.0 x = tf.Variable(2.2) pd = tfd.JointDistributionSequential([ tfd.Independent(tfd.InverseGaussian(scale1, (alpha**2 - beta**2)**.5), reinterpreted_batch_ndims=0), lambda mix: tfd.Normal(loc=mean + beta * mix, scale=mix) ]) def _make_val_and_grad_fn(value_fn): @functools.wraps(value_fn) def val_and_grad(x): return tfp.math.value_and_gradient(value_fn, x) return val_and_grad def nig(mean, scale1, alpha, beta): pd = tfd.JointDistributionSequential([ tfd.Independent(tfd.InverseGaussian(scale1, (alpha**2 - beta**2)**.5)),
def setup_and_run_hmc(threadid): np.random.seed(threadid) tf.random.set_seed(threadid) def sp(x): # softplus transform with shift return tf.nn.softplus(x) + 1e-4 def local_periodic_kernel(x1): # locally periodic kernel with single variable parameter. Other parameters are set # to encode annual activity pattern (period=365), RBF kernel is set to allow for # slow varying mean locations (2-year lengthscale). k1 = tfp.math.psd_kernels.ExpSinSquared(x1, np.float64(1.0), np.float64(365.0)) k2 = tfp.math.psd_kernels.ExponentiatedQuadratic( np.float64(1.0), np.float64(1 * 365.0)) #k1 = tfp.math.psd_kernels.ExpSinSquared(x1,np.float64(0.5),np.float64(365.0)) #k2 = tfp.math.psd_kernels.ExponentiatedQuadratic(np.float64(1.0),x2*np.float64(365.0)) #k2 = tfp.math.psd_kernels.ExponentiatedQuadratic(x2,np.float64(2*365.0)) return k1 * k2 # initial value of kernel parameters mparams_init = [6.0] #mparams_init=[5.0,4.0] lparams_init = [5.0] aparams_init = [-1.0] # prior distribution on parameters - changed to 20 lpriors = [tfd.Normal(loc=np.float64(5.), scale=np.float64(1))] apriors = [tfd.Normal(loc=np.float64(-1.), scale=np.float64(1))] # transform for parameter to ensure positive mtransforms = [sp] # prior distribution on parameter mpriors = [tfd.Normal(loc=np.float64(6.), scale=np.float64(0.1)) ] #, tfd.Normal(loc=np.float64(0.), scale=np.float64(0.1))] # create the model mover = moveNS(T, X, Z, BATCH_SIZE=1000, MIN_REMAIN=910, mkernel=local_periodic_kernel, mparams_init=mparams_init, mpriors=mpriors, mtransforms=mtransforms, aparams_init=aparams_init, apriors=apriors, lparams_init=lparams_init, lpriors=lpriors, mean_obs_noise=0, std_obs_noise=5.0) def build_trainable_location_scale_distribution(initial_loc, initial_scale): with tf.name_scope('build_trainable_location_scale_distribution'): dtype = tf.float32 initial_loc = initial_loc * tf.ones(tf.shape(initial_scale), dtype=dtype) initial_scale = tf.nn.softplus(initial_scale * tf.ones_like(initial_loc)) loc = tf.Variable(initial_value=initial_loc, name='loc') scale = tfp.util.TransformedVariable( tf.Variable(initial_scale, name='scale'), tfp.bijectors.Softplus()) posterior_dist = tfd.Normal(loc=loc, scale=scale) posterior_dist = tfd.Independent(posterior_dist) return posterior_dist flat_component_dists = [] for kparam in mover.kernel_params: init_loc = kparam init_scale = tf.random.uniform(shape=kparam.shape, minval=-2, maxval=2, dtype=tf.dtypes.float32) flat_component_dists.append( build_trainable_location_scale_distribution(init_loc, init_scale)) surrogate_posterior = tfd.JointDistributionSequential(flat_component_dists) def target_log_prob_fn(*inputs): params = [tf.squeeze(a) for a in inputs] loss = mover.log_posterior(*params) return loss start = time.time() losses = tfp.vi.fit_surrogate_posterior(target_log_prob_fn, surrogate_posterior, optimizer=tf.optimizers.Adam( learning_rate=0.1, beta_2=0.9), num_steps=1) #4000)#0000) steps = [] max_step = 0.0 for i in range(len(mover.kernel_params)): stdstep = surrogate_posterior.stddev()[i].numpy() #print(threadid,i,stdstep) meanp = surrogate_posterior.mean()[i].numpy() mover.kernel_params[i].assign(meanp) if stdstep.max() > max_step: max_step = stdstep.max() steps.append(stdstep) steps = [(1e-2 / max_step) * s for s in steps] steps = [1e-2 for s in steps] start = time.time() # sample from the posterior num_samples = 2000 #000##000#0 burn_in = 500 #000#4#000#5000 kr = mover.hmc_sample(num_samples=num_samples, skip=4, num_leapfrog_steps=8, burn_in=burn_in, init_step=steps) print(np.sum(kr.inner_results.is_accepted.numpy() / num_samples)) end = time.time() means_z = mover.get_mean_samples() + mean_x np.save('data/mean_shift_z_' + str(threadid) + '.npy', means_z) means = mover.get_mean_samples(X=T[::1]) + mean_x np.save('data/mean_shift_' + str(threadid) + '.npy', means) lengths = mover.get_lengthscale_samples() np.save('data/length_shift_' + str(threadid) + '.npy', lengths) amps = mover.get_amplitude_samples() np.save('data/amp_shift_' + str(threadid) + '.npy', amps) obs_noise_samples = tf.nn.softplus(mover.samples_[0]).numpy() np.save('data/obs_shift_' + str(threadid) + '.npy', obs_noise_samples) for i in range(len(mover.kernel_params)): output = mover.samples_[i].numpy() np.save('data/all_shift_' + str(i) + '_' + str(threadid) + '.npy', output) print(threadid, end - start)
cmin = -10. # lower range of uniform distribution on c cmax = 10. # upper range of uniform distribution on c mmu = 0. # mean of Gaussian distribution on m msigma = 10. # standard deviation of Gaussian distribution on m # convert x values and data to 32 bit float x = x.astype(np.float32) # x is being use globally here data = data.astype(np.float32) # set model - contains priors and the expected linear model model = tfd.JointDistributionSequential([ tfd.Normal(loc=mmu, scale=msigma, name="m"), # m prior tfd.Uniform(cmin, cmax, name="c"), # c prior lambda c, m: (tfd.Independent( tfd.Normal(loc=(m[..., tf.newaxis] * x + c[..., tf.newaxis]), scale=sigma), name="data", reinterpreted_batch_ndims=1, )) ]) def target_log_prob_fn(mvalue, cvalue): """Unnormalized target density as a function of states.""" return model.log_prob((mvalue, cvalue, data)) Nsamples = 2000 # final number of samples Nburn = 2000 # number of tuning samples
def setup_and_run_hmc(threadid): np.random.seed(threadid) tf.random.set_seed(threadid) def sp(x): # softplus transform with shift return tf.nn.softplus(x)+1e-4 def rbf_kernel(x1): # RBF kernel with single variable parameter. Other parameters are set # to encode lengthscale of 20 days return tfp.math.psd_kernels.ExponentiatedQuadratic(x1,np.float(2.0)) # initial value of kernel amplitude lparams_init=[0.0, 3.0] aparams_init=[0.0] # transform for parameter to ensure positive transforms=[sp] # prior distribution on parameter lpriors = [tfd.Normal(loc = np.float64(0.),scale=np.float64(5.)), tfd.Normal(loc=np.float64(3.), scale=np.float64(1))] # tfd.Normal(loc=np.float64(0.), scale=np.float64(10.0))] apriors = [tfd.Normal(loc = np.float64(0.),scale=np.float64(5.))] # create the model mover = moveNS(T,X,Z, ID, BATCH_SIZE=1460, velocity=True, #akernel=rbf_kernel, aparams_init=aparams_init, apriors=apriors, #atransforms=transforms, lkernel=rbf_kernel, lparams_init=lparams_init, lpriors=lpriors, ltransforms=transforms, mean_obs_noise=-5, std_obs_noise=1.0) def build_trainable_location_scale_distribution(initial_loc, initial_scale): with tf.name_scope('build_trainable_location_scale_distribution'): dtype = tf.float32 initial_loc = initial_loc * tf.ones(tf.shape(initial_scale), dtype=dtype) initial_scale = tf.nn.softplus(initial_scale * tf.ones_like(initial_loc)) loc = tf.Variable(initial_value=initial_loc, name='loc') scale=tfp.util.TransformedVariable(tf.Variable(initial_scale, name='scale'), tfp.bijectors.Softplus()) posterior_dist = tfd.Normal(loc=loc, scale=scale) posterior_dist = tfd.Independent(posterior_dist) return posterior_dist flat_component_dists = [] for kparam in mover.kernel_params: init_loc = kparam init_scale = tf.random.uniform(shape=kparam.shape, minval=-2, maxval=2, dtype=tf.dtypes.float32) flat_component_dists.append(build_trainable_location_scale_distribution(init_loc,init_scale)) surrogate_posterior = tfd.JointDistributionSequential(flat_component_dists) def target_log_prob_fn(*inputs): params = [tf.squeeze(a) for a in inputs] loss = mover.log_posterior(*params) return loss start = time.time() losses = tfp.vi.fit_surrogate_posterior(target_log_prob_fn, surrogate_posterior,optimizer=tf.optimizers.Adam(learning_rate=0.1, beta_2=0.9), num_steps=5000) steps = [] max_step = 0.0 for i in range(len(mover.kernel_params)): stdstep = surrogate_posterior.stddev()[i].numpy() meanp = surrogate_posterior.mean()[i].numpy() mover.kernel_params[i].assign(meanp) if stdstep.max()>max_step: max_step = stdstep.max() steps.append(stdstep) steps = [(1e-2/max_step)*s for s in steps] start = time.time() # sample from the posterior num_samples=200#4000 burn_in=500 kr = mover.hmc_sample(num_samples=num_samples, skip=8, burn_in=burn_in, init_step=steps) print(np.sum(kr.inner_results.is_accepted.numpy()/num_samples)) # sample from the posterior #mover.hmc_sample(num_samples=2000, skip=0, burn_in=1000) end = time.time() lengths = mover.get_lengthscale_samples(X=pZ) np.save('data/length_switch_' + str(threadid) + '.npy',lengths) amps = mover.get_amplitude_samples() np.save('data/amp_switch_' + str(threadid) + '.npy',amps) for i in range(len(mover.kernel_params)): output = mover.samples_[i].numpy() np.save('data/all_switch_' + str(i) + '_' + str(threadid) + '.npy',output) print(threadid,end - start)