def prior_dlogp(vars, model, flat_view): """Returns the gradient of the prior on the parameters as a vector of size D x 1""" terms = at.concatenate( [aesara.grad(var.logpt, var).flatten() for var in vars], axis=0) dlogp = aesara.clone_replace(terms, flat_view.replacements, strict=False) return dlogp
def forward_pass(self, z0): ret = aesara.clone_replace(self.forward, {self.root.z0: z0}) try: ret.tag.test_value = np.random.normal( size=z0.tag.test_value.shape).astype(self.z0.dtype) except AttributeError: ret.tag.test_value = self.root.z0.tag.test_value return ret
def test_gen_cloning_with_shape_change(self, datagen): gen = generator(datagen) gen_r = at_rng().normal(size=gen.shape).T X = gen.dot(gen_r) res, _ = aesara.scan(lambda x: x.sum(), X, n_steps=X.shape[0]) assert res.eval().shape == (50, ) shared = aesara.shared(datagen.data.astype(gen.dtype)) res2 = aesara.clone_replace(res, {gen: shared**2}) assert res2.eval().shape == (1000, )
def test_rvs_to_value_vars_nested(): # Test that calling rvs_to_value_vars in models with nested transformations # does not change the original rvs in place. See issue #5172 with pm.Model() as m: one = pm.LogNormal("one", mu=0) two = pm.LogNormal("two", mu=at.log(one)) # We add potentials or deterministics that are not in topological order pm.Potential("two_pot", two) pm.Potential("one_pot", one) before = aesara.clone_replace(m.free_RVs) # This call would change the model free_RVs in place in #5172 res, _ = rvs_to_value_vars(m.potentials, apply_transforms=True) after = aesara.clone_replace(m.free_RVs) assert equal_computations(before, after)
def logp_norm(self): sized_symbolic_logp = self.approx.sized_symbolic_logp if self.use_histogram: sized_symbolic_logp = aesara.clone_replace( sized_symbolic_logp, dict( zip(self.approx.symbolic_randoms, self.approx.collect("histogram"))), ) return sized_symbolic_logp / self.approx.symbolic_normalizing_constant
def join_nonshared_inputs( point: Dict[str, np.ndarray], xs: List[TensorVariable], vars: List[TensorVariable], shared, make_shared: bool = False, ): """ Takes a list of Aesara Variables and joins their non shared inputs into a single input. Parameters ---------- point: a sample point xs: list of Aesara tensors vars: list of variables to join Returns ------- tensors, inarray tensors: list of same tensors but with inarray as input inarray: vector of inputs """ if not vars: raise ValueError("Empty list of variables.") joined = at.concatenate([var.ravel() for var in vars]) if not make_shared: tensor_type = joined.type inarray = tensor_type("inarray") else: if point is None: raise ValueError("A point is required when `make_shared` is True") joined_values = np.concatenate( [point[var.name].ravel() for var in vars]) inarray = aesara.shared(joined_values, "inarray") if aesara.config.compute_test_value != "off": inarray.tag.test_value = joined.tag.test_value replace = {} last_idx = 0 for var in vars: shape = point[var.name].shape arr_len = np.prod(shape, dtype=int) replace[var] = reshape_t(inarray[last_idx:last_idx + arr_len], shape).astype(var.dtype) last_idx += arr_len replace.update(shared) xs_special = [ aesara.clone_replace(x, replace, rebuild_strict=False) for x in xs ] return xs_special, inarray
def inline_ofg_expansion(fgraph, node): """ This optimization expands internal graph of OpFromGraph. Only performed if node.op.is_inline == True Doing so can improve optimization at the cost of compilation speed. """ op = node.op if not isinstance(op, OpFromGraph): return False if not op.is_inline: return False return aesara.clone_replace( op.local_outputs, {u: v for u, v in zip(node.op.local_inputs, node.inputs)})
def test_rop_lop(): mx = matrix("mx") mv = matrix("mv") v = vector("v") y = matrix_inverse(mx).sum(axis=0) yv = aesara.gradient.Rop(y, mx, mv) rop_f = function([mx, mv], yv) sy, _ = aesara.scan( lambda i, y, x, v: (aesara.gradient.grad(y[i], x) * v).sum(), sequences=aet.arange(y.shape[0]), non_sequences=[y, mx, mv], ) scan_f = function([mx, mv], sy) rng = np.random.default_rng(utt.fetch_seed()) vx = np.asarray(rng.standard_normal((4, 4)), aesara.config.floatX) vv = np.asarray(rng.standard_normal((4, 4)), aesara.config.floatX) v1 = rop_f(vx, vv) v2 = scan_f(vx, vv) assert _allclose(v1, v2), f"ROP mismatch: {v1} {v2}" raised = False try: aesara.gradient.Rop(aesara.clone_replace(y, replace={mx: break_op(mx)}), mx, mv) except ValueError: raised = True if not raised: raise Exception( "Op did not raised an error even though the function" " is not differentiable" ) vv = np.asarray(rng.uniform(size=(4,)), aesara.config.floatX) yv = aesara.gradient.Lop(y, mx, v) lop_f = function([mx, v], yv) sy = aesara.gradient.grad((v * y).sum(), mx) scan_f = function([mx, v], sy) v1 = lop_f(vx, vv) v2 = scan_f(vx, vv) assert _allclose(v1, v2), f"LOP mismatch: {v1} {v2}"
def elemwise_dlogL(vars, model, flat_view): """ Returns Jacobian of the log likelihood for each training datum wrt vars as a matrix of size N x D """ # select one observed random variable obs_var = model.observed_RVs[0] # tensor of shape (batch_size,) logL = obs_var.logp_elemwiset.sum(axis=tuple(range(1, obs_var.logp_elemwiset.ndim))) # calculate fisher information terms = [] for var in vars: output, _ = aesara.scan( lambda i, logX=logL, v=var: aesara.grad(logX[i], v).flatten(), sequences=[at.arange(logL.shape[0])], ) terms.append(output) dlogL = aesara.clone_replace( at.concatenate(terms, axis=1), flat_view.replacements, strict=False ) return dlogL
def infer_shape(self, fgraph, node, shapes): out_shp = infer_shape(self.local_outputs, self.local_inputs, shapes) # Clone the output shape so that shape are computed from outer inputs. # Note: # Here we can do it more simply like: # ret = [aesara.clone_replace(shp, replace=repl) for shp in out_shp] # But doing it multiple time could duplicate common subgraph between # each shape call. Aesara optimizer will clean this up later, but this # will ask extra work to the optimizer. repl = dict(zip(self.local_inputs, node.inputs)) cloned = aesara.clone_replace(reduce(tuple.__add__, out_shp), replace=repl) ret = [] used = 0 for i in range(len(out_shp)): nb = len(out_shp[i]) ret.append(cloned[used:used + nb]) used += nb return ret
def join_nonshared_inputs(xs, vars, shared, make_shared=False): """ Takes a list of aesara Variables and joins their non shared inputs into a single input. Parameters ---------- xs: list of aesara tensors vars: list of variables to join Returns ------- tensors, inarray tensors: list of same tensors but with inarray as input inarray: vector of inputs """ if not vars: raise ValueError("Empty list of variables.") joined = at.concatenate([var.ravel() for var in vars]) if not make_shared: tensor_type = joined.type inarray = tensor_type("inarray") else: inarray = aesara.shared(joined.tag.test_value, "inarray") ordering = ArrayOrdering(vars) inarray.tag.test_value = joined.tag.test_value get_var = {var.name: var for var in vars} replace = { get_var[var]: reshape_t(inarray[slc], shp).astype(dtyp) for var, slc, shp, dtyp in ordering.vmap } replace.update(shared) xs_special = [aesara.clone_replace(x, replace, strict=False) for x in xs] return xs_special, inarray
def _run(self, num_features, num_timesteps, batch_size, mode): # determine shapes of inputs and targets depending on the batch size if batch_size == 1: inputs_size = (num_timesteps, num_features) targets_size = (num_timesteps, 1) else: inputs_size = (num_timesteps, batch_size, num_features) targets_size = (num_timesteps, batch_size, 1) # make inputs and targets shared variables inputs = aesara.shared(self.rng.uniform(size=inputs_size).astype( config.floatX), borrow=True) targets = aesara.shared(self.rng.uniform(size=targets_size).astype( config.floatX), borrow=True) # create symbolic inputs and targets variables if batch_size == 1: x = matrix("inputs") t = matrix("targets") else: x = tensor3("inputs") t = tensor3("inputs") x.tag.test_value = inputs.get_value(borrow=True) t.tag.test_value = targets.get_value(borrow=True) # create a set of parameters for a simple RNN W_xh = aesara.shared( (0.01 * self.rng.uniform(size=(num_features, 10))).astype( config.floatX), borrow=True, ) W_hh = aesara.shared( (0.01 * self.rng.uniform(size=(10, 10))).astype(config.floatX), borrow=True) W_hy = aesara.shared( (0.01 * self.rng.uniform(size=(10, 1))).astype(config.floatX), borrow=True) b_h = aesara.shared(np.zeros(10).astype(config.floatX), borrow=True) b_y = aesara.shared(np.zeros(1).astype(config.floatX), borrow=True) params = [W_xh, W_hh, W_hy, b_h, b_y] # recurrent function def step(x_t, h_tm1): h = tanh(dot(h_tm1, W_hh) + dot(x_t, W_xh) + b_h) return h # build recurrent graph if batch_size == 1: h_0 = aet.alloc(0.0, 10).astype(config.floatX) else: h_0 = aet.alloc(0.0, batch_size, 10).astype(config.floatX) h, updates = aesara.scan(step, sequences=[x], outputs_info=[h_0]) # network output y = dot(h, W_hy) + b_y # Create Gauss-Newton-Matrix object. Not really of any use here, but I # need it for Hessian-Free optimization. gn = GaussNewtonMatrix(y) # compute MSE cost = ((t - y)**2).sum(axis=1).mean() # Compute the cost at some other point in the parameter # space. Not really of any use here, but this is how I do it # during certain iterations of CG in the HF algorithm. There, # it's in fact `pi + current update proposal`. For simplicity, # I just multiply by 2 here. cost_ = aesara.clone_replace(cost, replace={pi: 2 * pi for pi in params}) # Compute Gauss-Newton-Matrix times some vector `v` which is `p` in CG, # but for simplicity, I just take the parameters vector because it's # already there. Gv = gn(v=params, cost=cost, parameters=params, damp=aet.constant(1.0)) # compile Aesara function f = aesara.function([], [cost_] + Gv, givens={ x: inputs, t: targets }, mode=mode) # execute f()
def forward_pass(self, z0): ret = aesara.clone_replace(self.forward, {self.root.z0: z0}) return ret