def sample_scan(params, tup, x): """ Perform single step update of the network """ _, (update_W, update_U, update_b), (reset_W, reset_U, reset_b), (out_W, out_U, out_b), (sm_W, sm_b) = params hidden = tup[3] logP = tup[2] key = tup[0] inp = tup[1] update_gate = sigmoid( np.dot(inp, update_W) + np.dot(hidden, update_U) + update_b) reset_gate = sigmoid( np.dot(inp, reset_W) + np.dot(hidden, reset_U) + reset_b) output_gate = np.tanh( np.dot(inp, out_W) + np.dot(np.multiply(reset_gate, hidden), out_U) + out_b) output = np.multiply(update_gate, hidden) + np.multiply( 1 - update_gate, output_gate) hidden = output logits = np.dot(hidden, sm_W) + sm_b key, subkey = random.split(key) samples = random.categorical( subkey, logits, axis=1, shape=None) # sampling the conditional samples = one_hot( samples, sm_b.shape[0]) # convert to one hot encoded vector log_P_new = np.sum(samples * log_softmax(logits), axis=1) log_P_new = log_P_new + logP # update the value of the logP of the sample return (key, samples, log_P_new, output), samples
def model(ps): rate, gamma, seed, scale = ps['rate'], ps['gamma'], ps['seed'], ps['scale'] dist(rate, Normal(0, 10)) dist(scale, Normal(0, 1)) dist(gamma, Normal(-2, 0.1)) dist(seed, Normal(-10, 2)) rate = sigmoid(rate) gamma = sigmoid(gamma) seed = sigmoid(seed) scale = np.exp(ps['scale']) out = vmap(sir)(cases, rate[:,period], gamma, seed) I, R, caught = out[:,:,1], out[:,:,2], out[:,:,3] dist(seropos, Normal((I+R)[serocountry, serodate], scale[serocountry,0])) R = np.log(rate) - np.log(I[:,periodi]) - np.log(gamma)[:,None] gp = GaussianProcess(Matern52(0.5,60/plen)) dist(R, gp.at(np.arange(0, nperiods), 1e-5)) capacity, cChange = ps['capacity'], ps['cChange'] dist(capacity, Normal(0, 1)) dist(cChange, Normal(0, 5)) capacity = sigmoid(date/30/cChange + capacity[:,weekday]) dist(cases*population, PoissonApprox((I-caught)*population*capacity, scale[:,1,None])) return out, R
def predict_proba(params, teamA_rating, teamB_rating, has_tie): dr = (teamA_rating - teamB_rating) * params["beta"] gamma = nn.relu(params["gamma"]) * has_tie pA = jnp.clip(nn.sigmoid(dr - gamma), __EPS__, 1 - __EPS__) pB = jnp.clip(nn.sigmoid(-dr - gamma), __EPS__, 1 - __EPS__) pD = nn.relu(1.0 - pA - pB) * has_tie s = pA + pB + pD return [jnp.array(x, float) for x in [pA / s, pD / s, pB / s]]
def Ls(th): # th=jp.stack(th).reshape(-1) p_1, p_2 = sigmoid(th[0]), sigmoid(th[1]) x, y = jp.array([p_1, 1 - p_1]), jp.array([p_2, 1 - p_2]) # print(x.shape,y.shape,payout_mat_1.shape,payout_mat_2.shape) L_1 = jp.dot(jp.dot(x.T, payout_mat_1), y) L_2 = jp.dot(jp.dot(x.T, payout_mat_2), y) return jp.array([L_1.reshape(-1)[0], L_2.reshape(-1)[0]])
def apply(params, inputs, **kwargs): prev_state = kwargs.pop("prev_state", initial_state()) W, b = params xh = jnp.concatenate([inputs, prev_state.h], axis=-1) gated = jnp.matmul(xh, W) + b i, f, o, g = jnp.split(gated, indices_or_sections=4, axis=-1) c = sigmoid(f) * prev_state.c + sigmoid(i) * jnp.tanh(g) h = sigmoid(o) * jnp.tanh(c) return h, LSTMState(h, c)
def gru_cell(carry, x): def param(name): return parameter((x.shape[1] + carry_size, carry_size), param_init, name) both = np.concatenate((x, carry), axis=1) update = sigmoid(np.dot(both, param('update_kernel'))) reset = sigmoid(np.dot(both, param('reset_kernel'))) both_reset_carry = np.concatenate((x, reset * carry), axis=1) compute = np.tanh(np.dot(both_reset_carry, param('compute_kernel'))) out = update * compute + (1 - update) * carry return out, out
def __call__(self, state: Tuple[Tensor, Tensor], inputs: Tensor) -> Tuple[Tuple[Tensor, Tensor], Tensor]: h_prev, c_prev = state concat_vec = np.hstack((inputs, h_prev)) f = nn.sigmoid(np.dot(self.Wf, concat_vec) + self.bf) i = nn.sigmoid(np.dot(self.Wi, concat_vec) + self.bi) C_bar = np.tanh(np.dot(self.Wc, concat_vec) + self.bc) c = f * c_prev + i * C_bar o = nn.sigmoid(np.dot(self.Wo, concat_vec) + self.bo) h = o * np.tanh(c) # hidden state vector is copied as output return (h, c), h
def apply_fun_scan(params, hidden, inp): """ Perform single timestep update of the network. """ _, (update_W, update_U, update_b), (reset_W, reset_U, reset_b), ( out_W, out_U, out_b) = params update_gate = sigmoid(np.dot(inp, update_W) + np.dot(hidden, update_U) + update_b) reset_gate = sigmoid(np.dot(inp, reset_W) + np.dot(hidden, reset_U) + reset_b) output_gate = np.tanh(np.dot(inp, out_W) + np.dot(np.multiply(reset_gate, hidden), out_U) + out_b) output = np.multiply(update_gate, hidden) + np.multiply(1-update_gate, output_gate) hidden = output return hidden, hidden
def apply_fun(params, inputs, **kwargs): # a * f(x) + (1 - a) * x (fx, logdet), (x, _) = inputs gate = sigmoid(params) out = gate * fx + (1 - gate) * x logdet = softplus(logdet + params) - softplus(params) return out, logdet
def GRU_forward(GRU_params, hidden, t, X): inp = jnp.concatenate((t, X), 0) # M x D+1 (update_W, update_U, update_b), (reset_W, reset_U, reset_b), (out_W, out_U, out_b) = GRU_params reset_gate = sigmoid( jnp.dot(inp, reset_W) + jnp.dot(hidden, reset_U) + reset_b) update_gate = sigmoid( jnp.dot(inp, update_W) + jnp.dot(hidden, update_U) + update_b) output_gate = jnp.tanh( jnp.dot(inp, out_W) + jnp.dot(jnp.multiply(reset_gate, hidden), out_U) + out_b) output = jnp.multiply(update_gate, hidden) + jnp.multiply( 1 - update_gate, output_gate) return output
def apply_fun(params, input, adj, activation=nn.relu, **kwargs): rng = kwargs.pop('rng', None) is_training = kwargs.pop('is_training', None) first_x, x = input # we need the first input for 'raw' infusion W_t, b_t, Theta, W_h, W_x = params x = drop_fun(None, x, is_training=is_training, rng=rng) # compute gate gate = nn.sigmoid(np.dot(x, W_t) + b_t) F_hom = np.dot(x, Theta) if infusion == 'inner': F_het = F_hom elif infusion == 'outer': F_het = np.dot(x, W_h) if x.shape[-1] != W_h.shape[-1] else x elif infusion == 'raw': F_het = np.dot(first_x, W_x) # k-hop convolution: adj is adj^k without self connections F_hom = matmul(adj, F_hom, F_hom.shape[0]) F_hom = activation(F_hom) out = gate*F_hom + (1 - gate)*F_het return out
def infer_bottom_half(params, qz_params, train_image, seed=412): """ Args: params: decoder qz_params: variational optimized posterior params train_image: single digit trained on Plots original whole image beside inferred greyscale. """ key = random.PRNGKey(seed) key, subkey = random.split(key) # sample z ~ approximate posterior q, feed it to decoder to find # Bernoulli means of p(bottom half of image | x). z = sample_diag_gaussian(*unpack_gaussian_params(qz_params), subkey) x = sigmoid(decoder(z, params['dec'])) image_ = onp.zeros((28, 28)) image_[:14, :] = train_image.reshape([28, 28])[:14, :] # original top half image_[14:, :] = x.reshape([28, 28])[14:, :] # inferred bottom half plt_im = onp.zeros((28, 28 * 2)) plt_im[:, :28] = image_ plt_im[:, 28:] = train_image.reshape([28, 28]) fig, ax = plt.subplots() plt.imshow(plt_im, cmap=plt.cm.binary) plt.axis('off') plt.savefig('frankenstein_bottom_to_top.png', bbox_inches='tight')
def logprob_from_conditional_params(images, means, inv_scales, logit_probs): images = np.expand_dims(images, 1) cdf = lambda offset: sigmoid((images - means + offset) * inv_scales) upper_cdf = np.where(images == 1, 1, cdf(1 / 255)) lower_cdf = np.where(images == -1, 0, cdf(-1 / 255)) all_logprobs = np.sum(np.log(np.maximum(upper_cdf - lower_cdf, 1e-12)), -1) log_mix_coeffs = logit_probs - logsumexp(logit_probs, -3, keepdims=True) return np.sum(logsumexp(log_mix_coeffs + all_logprobs, axis=-3), axis=(-2, -1))
def act(rngkey, theta): """ returns: action, one of 1 (push) or -1 (don't push) """ prob_push = nn.sigmoid(theta) probs = jnp.array([prob_push, 1 - prob_push]) return random.choice(rngkey, ACTION_SPACE, p=probs)
def _lstm_cell(state, weights: LSTM_WEIGHTS, input): h, c = state i = sigmoid( jnp.matmul(input, weights.w_ii) + jnp.matmul(h, weights.w_hi) + weights.bi) f = sigmoid( jnp.matmul(input, weights.w_if) + jnp.matmul(h, weights.w_hf) + weights.bf) o = sigmoid( jnp.matmul(input, weights.w_io) + jnp.matmul(h, weights.w_ho) + weights.bo) g = tanh( jnp.matmul(input, weights.w_ig) + jnp.matmul(c, weights.w_hg) + weights.bg) c = f * c + i * g h = o * tanh(c) return jnp.stack((h, c)), h
def discretized_mix_logistic_loss(theta, y, num_class=256, log_scale_min=-7.): """ Discretized mixture of logistic distributions loss :param theta: B x T x 3 * nr_mix :param y: B x T x 1 """ theta_shape = theta.shape nr_mix = theta_shape[2] // 3 # unpack parameters means = theta[:, :, :nr_mix] log_scales = np.maximum(theta[:, :, nr_mix:2 * nr_mix], log_scale_min) logit_probs = theta[:, :, nr_mix * 2:nr_mix * 3] # B x T x 1 => B x T x nr_mix y = np.broadcast_to(y, y.shape[:-1] + (nr_mix, )) centered_y = y - means inv_stdv = np.exp(-log_scales) plus_in = inv_stdv * (centered_y + 1. / (num_class - 1)) cdf_plus = sigmoid(plus_in) min_in = inv_stdv * (centered_y - 1. / (num_class - 1)) cdf_min = sigmoid(min_in) # log probability for edge case of 0 (before scaling): log_cdf_plus = plus_in - softplus(plus_in) # log probability for edge case of 255 (before scaling): log_one_minus_cdf_min = -softplus(min_in) cdf_delta = cdf_plus - cdf_min # probability for all other cases mid_in = inv_stdv * centered_y log_pdf_mid = mid_in - log_scales - 2. * softplus(mid_in) log_probs = np.where( y < -0.999, log_cdf_plus, np.where( y > 0.999, log_one_minus_cdf_min, np.where(cdf_delta > 1e-5, np.log(np.maximum(cdf_delta, 1e-12)), log_pdf_mid - np.log((num_class - 1) / 2)))) log_probs = log_probs + log_softmax(logit_probs) return -np.sum(logsumexp(log_probs, axis=-1), axis=-1)
def apply_fun_scan(params, hidden_cell, inp): """ Perform single timestep update of the network. """ _, (forget_W, forget_U, forget_b), (in_W, in_U, in_b), ( out_W, out_U, out_b), (change_W, change_U, change_b) = params hidden, cell = hidden_cell input_gate = sigmoid(np.dot(inp, in_W) + np.dot(hidden, in_U) + in_b) change_gate = np.tanh(np.dot(inp, change_W) + np.dot(hidden, change_U) + change_b) forget_gate = sigmoid(np.dot(inp, forget_W) + np.dot(hidden, forget_U) + forget_b) cell = np.multiply(change_gate, input_gate) + np.multiply(cell, forget_gate) output_gate = sigmoid(np.dot(inp, out_W) + np.dot(hidden, out_U) + out_b) output = np.multiply(output_gate, np.tanh(cell)) hidden_cell = (hidden, cell) return hidden_cell, hidden_cell
def gated_resnet(inputs, aux=None): chan = inputs.shape[-1] c1 = Conv(chan)(nonlinearity(inputs)) if aux is not None: c1 = c1 + NIN(chan)(nonlinearity(aux)) c1 = nonlinearity(c1) if dropout_p > 0: c1 = Dropout(rate=dropout_p)(c1) c2 = Conv(2 * chan, init_scale=0.1)(c1) a, b = np.split(c2, 2, axis=-1) c3 = a * sigmoid(b) return inputs + c3
def generate_from_prior(gen_params, num_samples, noise_dim, key=random.PRNGKey(2)): """ Args: gen_params: decoder parameters num_samples: number of latent variable samples Return: Fake data: Bernouilli means p(x|z) """ latents = random.normal(key, (num_samples, noise_dim)) return sigmoid(neural_net_predict(gen_params, latents))
def apply_fun_scan(params, tup, inp): """ Perform single step update of the network """ _, (update_W, update_U, update_b), (reset_W, reset_U, reset_b), (out_W, out_U, out_b), (sm_W, sm_b) = params hidden = tup[0] logP = tup[1] update_gate = sigmoid( np.dot(inp, update_W) + np.dot(hidden, update_U) + update_b) reset_gate = sigmoid( np.dot(inp, reset_W) + np.dot(hidden, reset_U) + reset_b) output_gate = np.tanh( np.dot(inp, out_W) + np.dot(np.multiply(reset_gate, hidden), out_U) + out_b) output = np.multiply(update_gate, hidden) + np.multiply( 1 - update_gate, output_gate) hidden = output logP = log_softmax(np.dot(hidden, sm_W) + sm_b) return (hidden, logP), (hidden, logP)
def sample(p, temperature, key, num_samples=1): """ Generate Binomial Concrete samples :param p: Binomial Concrete params (interpreted as Bernoulli probabilities) (jax.numpy array) :param temperature: temperature parameter :param key: PRNG key :param num_samples: number of samples """ tol = 1e-7 p = np.clip(p, tol, 1 - tol) logit_p = logit(p) base_randomness = random.logistic(key, shape=(num_samples, *p.shape)) return nn.sigmoid((logit_p + base_randomness) / (temperature + tol))
def Ls(th): p_1_0 = sigmoid(th[0][0:1]) p_2_0 = sigmoid(th[1][0:1]) p = jp.stack([ p_1_0 * p_2_0, p_1_0 * (1 - p_2_0), (1 - p_1_0) * p_2_0, (1 - p_1_0) * (1 - p_2_0) ], axis=1) # print('p',p,p.shape) p_1 = jp.reshape(sigmoid(th[0][1:5]), (4, 1)) p_2 = jp.reshape(sigmoid(th[1][1:5]), (4, 1)) P = jp.stack([ p_1 * p_2, p_1 * (1 - p_2), (1 - p_1) * p_2, (1 - p_1) * (1 - p_2) ], axis=1).reshape((4, 4)) # print('P',P,P.shape) # print('inv', jsp.linalg.inv(jp.eye(4)-gamma*P), jsp.linalg.inv(jp.eye(4)-gamma*P).shape) M = -jp.dot(p, jsp.linalg.inv(jp.eye(4) - gamma * P)) # print('M',M) L_1 = jp.dot(M, jp.reshape(payout_mat_1, (4, 1))) L_2 = jp.dot(M, jp.reshape(payout_mat_2, (4, 1))) # print('L_1',L_1.reshape(-1)[0]) # print('L_2',L_2.reshape(-1)[0]) return jp.array([L_1.reshape(-1)[0], L_2.reshape(-1)[0]])
def _predict(self, params, base_preds, context, return_probs, target=None): # Base logits base_preds = jnp.clip(base_preds, a_min=self.pred_clipping, a_max=(1.0 - self.pred_clipping)) logits = jsp.special.logit(base_preds) logits = jnp.expand_dims(logits, axis=1) if self.num_classes == 2: logits = jnp.tile(logits, reps=(1, 1, 1)) else: logits = jnp.tile(logits, reps=(1, self.num_classes, 1)) # Turn target class into one-hot if target is not None: target = jnn.one_hot(target, num_classes=self.num_classes) if self.num_classes == 2: target = target[:, 1:] # Layers if target is None: for n, layer in enumerate(self.layers): logits = layer.predict(params=params[f'layer{n}'], logits=logits, context=context) else: for n, layer in enumerate(self.layers): params[f'layer{n}'], logits = layer.predict( params=params[f'layer{n}'], logits=logits, context=context, target=target) logits = jnp.squeeze(logits, axis=-1) if self.num_classes == 2: logits = jnp.squeeze(logits, axis=1) # Output prediction if return_probs: prediction = jnn.sigmoid(logits) elif self.num_classes == 2: prediction = logits > 0.0 else: prediction = jnp.argmax(logits, axis=1) if target is None: return prediction else: return params, prediction
def lin_interpolate(params, train_images, train_labels, examples): """ Args: params: List[Tuple(W, b)] for each layer in NN train_images, train_labels = (10k, 784), (10k, 10) examples: List[Tuple[digit 1, digit 2]] samples to interpolate Examining latent variable model with continuous latent variables by linearly interpolating between latent reps (mean vecs of encodings) of two points. """ def interpolate(za, zb, alpha): """Linear interpolation z_alpha = alpha * z_a + (1-a) * z_b """ z_alpha = alpha * za + (1 - alpha) * zb return z_alpha # sample 3 pairs of images, each having a different class labels_to_images = defaultdict(list) # encode data and get mean vectors labels = np.argmax(train_labels, axis=-1) # linearly interpolate between mean vectors for im, lab in tqdm(zip(train_images, labels)): labels_to_images[lab].append(im) print("labels to images", labels_to_images.keys()) # plot Bernoulli means p(x|z_\alpha) at 10 equally spaced points image_ = onp.zeros([3 * 28, 10 * 28]) # plot generative distribution along linear interpolation for row, pair in enumerate(examples): images = [labels_to_images[pair[0]][0], labels_to_images[pair[1]][0]] images = np.stack(images) mus, log_sigmas = vmap(encoder, in_axes=(0, None))(images, params['enc']) alphas = np.linspace(0, 1, 10)[::-1] interpolated_means = [interpolate(mus[0], mus[1], a) for a in alphas] interpolated_means = np.stack(interpolated_means) bern_mus = sigmoid( vmap(decoder, in_axes=(0, None))(interpolated_means, params['dec'])) bern_ims = bern_mus.reshape([-1, 28, 28]) print("bern ims", bern_ims.shape) for col in range(10): image_[row * 28:(row + 1) * 28, col * 28:(col + 1) * 28] = bern_ims[col, ...] fig, ax = plt.subplots() plt.imshow(image_, cmap=plt.cm.binary) plt.axis('off') plt.savefig('interpolated_means.png', bbox_inches='tight')
def clipped_sigmoid(x): """Customize sigmoid function to avoid an overflow. Parameters ---------- x : ndarray Returns ------- x : ndarray """ # x is clipped because nn.sigmoid sometimes get overflow and return nan # restrict domain of sigmoid function within [1e-15, 1 - 1e-15] sigmoid_range = 34.538776394910684 x = jnp.clip(x, -sigmoid_range, sigmoid_range) x = nn.sigmoid(x) return x
def conditional_sample(p, y, temperature, key): """ Generate conditional Binomial Concrete sample :param p: Binomial Concrete params (interpreted as Bernoulli probabilities) (jax.numpy array) :param y: Conditioning parameters (jax.numpy array) :param temperature: temperature parameter :param key: PRNG key """ tol = 1e-7 p = np.clip(p, tol, 1 - tol) v = random.uniform(key, shape=y.shape) v_prime = (v * p + (1 - p)) * y + (v * (1 - p)) * (1 - y) v_prime = np.clip(v_prime, tol, 1 - tol) logit_v = logit(v_prime) logit_p = logit(p) return nn.sigmoid((logit_p + logit_v) / (temperature + tol))
def apply_fn(params, inputs, **kwargs): """Applies layer. Args: params: Layer parameters, (eta,). inputs: Float numpy array with shape (batch_size, num_grids, num_in_channels). **kwargs: Other key word arguments. Unused. Returns: Float numpy array with shape (batch_size, num_grids, num_channels). """ del kwargs eta, = params # shape (num_grids, num_grids, num_channels) kernels = _exponential_function_channels( displacements, widths=minval + (maxval - minval) * nn.sigmoid(eta)) # shape (batch_size, num_grids, num_channels) return jnp.squeeze( # shape (batch_size, 1, num_grids, num_channels) jnp.tensordot(inputs, kernels, axes=(1, 0)) * dx, axis=1)
def run_and_plot(): key = random.PRNGKey(0) eta = .01 theta = 3. a = 1. baseline = 0 print("Running...") data = {} for _ in range(10000): key, subkey = random.split(key) a, r = run_episode(subkey, theta, a) policy_grad = single_ep_policy_grad(theta, a, r - baseline) baseline = update_baseline(baseline, r) aux = { "action": a, "reward": r, "theta": theta, "grad": policy_grad, } theta = theta + eta * policy_grad append_to_log(data, aux) # plotting fig, axs = plt.subplots(2, 1, figsize=[8, 8]) axs = axs.flatten() axs[0].plot(data['theta'], label='theta') axs[1].plot(nn.sigmoid(data['theta']), label='prob(push)') for ax in axs: ax.legend() plt.show() return
def map_to_minusone_one(x): return 2 * sigmoid(x) - 1
'cChange': np.zeros([ncountries, 1]) + 1.5, 'gamma': np.zeros(ncountries)-2.0, # recovery 'scale': np.zeros([ncountries, 2]), 'seed': np.zeros(ncountries) - 10} poirot.loglikelihood(model, ps) state = InferenceState(ps) state = infer(model, state) poirot.lls.pop() i = countries.index("United Kingdom") ps = state.mean() out, R = model(ps) np.exp(ps['seed'])*population[i] 1/sigmoid(ps['gamma']) np.exp(ps['scale']) plt.bar(dates, cases[i,:]) plotR(i) plt.plot([dates[i] for i in periodi], np.exp(R)[i,:]) plt.plot(dates, sigmoid(ps['rate'])[i,period]) plt.plot(dates, out[i,:,0]) # S plt.plot(dates, out[i,:,1]) # I plt.plot(dates, out[i,:,2]) # R plt.plot(dates, out[i,:,3]) # caught plt.plot(dates, sigmoid((date/30)/ps['cChange'][0] + ps['capacity'][0,weekday]))