def evaluate(self, values, use_pfun=True): """ Evaluates value(s) belonging to the domain of the variable. :param values: None, set of a single integer, array, or scalar. :param use_pfun: boolean flag to make use of pfun if previously set. :return: a NumPy array of the values (see Variable.evaluate()): """ use_pfun = use_pfun and self._pfun is not None and isunitsetint(values) # If not using pfun while random sampling, we use sfun for booleans if not use_pfun: if self._vtype in VTYPES[bool] and hasattr(self._sfun, 'expr'): if isunitsetint(values) and self._sfun.expr == bernoulli_sfun: number = list(values)[0] if not number or number < 0: number = number if not number else -number return Distribution(self._name, {self.name: self._sfun[None](number)}) return super().evaluate(values) # Evaluate values from inverse cdf bounded within cdf limits number = list(values)[0] assert self.isfinite, \ "Cannot evaluate {} values for bounds: {}".format(values, self._ulims) lims = self.pfun[0](self._ulims) values = uniform( lims[0], lims[1], number, isinstance(self._vset[0], tuple), isinstance(self._vset[1], tuple) ) return Distribution(self._name, {self.name: self.pfun[1](values)})
def interp(self, *args, cond_pdf=False): # args in order of mean - one must be a unitsetint idx = None dmu = np.empty((self._n, 1), dtype=float) vals = [arg for arg in args] for i, arg in enumerate(args): if isunitsetint(arg): if idx is None: idx = i else: raise ValueError( "Only one argument can be interpolated at a time") else: dmu[i] = arg - self._mean[i] assert idx is not None, "No variable specified for interpolation" dmu = np.delete(dmu, (idx), axis=0) lims = self._cdfs[idx] number = list(args[idx])[0] cdf = uniform(lims[0], lims[1], number) mean = self._mean[idx] + float(self._coef[idx].dot(dmu)) stdv = self._stdv[idx] vals = scipy.stats.norm.ppf(cdf, loc=mean, scale=stdv) if not cond_pdf: return vals return vals, scipy.stats.norm.pdf(vals, loc=mean, scale=stdv)
def eval_step(self, pred_vals, succ_vals, reverse=False): """ Returns adjusted succ_vals """ # Evaluate deltas if required if succ_vals is None: if self._delta is None: pred_values = list(pred_vals.values()) if all([isscalar(pred_value) for pred_value in pred_values]): raise ValueError( "Stochastic step sampling not supported for Field; use RF" ) else: succ_vals = pred_vals else: succ_vals = self.eval_delta() elif isinstance(succ_vals, Expression) or \ isinstance(succ_vals, (tuple, self._delta_type)): succ_vals = self.eval_delta(succ_vals) # Apply deltas cond = None if isinstance(succ_vals, self._delta_type): succ_vals = self.apply_delta(pred_vals, succ_vals) elif isunitsetint(succ_vals): raise ValueError( "Stochastic step sampling not supported for Field; use RF") # Initialise outputs with predecessor values dims = {} kwargs = {'reverse': reverse} if cond is not None: kwargs = {'cond': cond} vals = collections.OrderedDict() for key in self._keylist: vals.update({key: pred_vals[key]}) if succ_vals is None: return vals, dims, kwargs # If stepping, add successor values for key in self._keylist: mod_key = key + "'" succ_key = key if mod_key not in succ_vals else mod_key vals.update({key + "'": succ_vals[succ_key]}) return vals, dims, kwargs
def parse_args(self, *args, **kwds): """ Returns (values, iid) from *args and **kwds """ pass_all = False if 'pass_all' not in kwds else kwds.pop('pass_all') values = parse_as_str_dict(*args, **kwds) seen_keys = [] for key, val in values.items(): count_comma = key.count(',') if count_comma: seen_keys.extend(key.split(',')) if isinstance(val, (tuple, list)): assert len(val) == count_comma+1, \ "Mismatch in key specification {} and number of values {}".\ format(key, len(val)) else: values.update({key: [val] * (count_comma + 1)}) else: seen_keys.append(key) if not pass_all: assert seen_keys[-1] in self._keyset, \ "Unrecognised key {} among available Variables {}".format( seen_keys[-1], self._keyset) # Default values def_val = None if self._anyfloat: if not len(values): def_val = {0} else: def_val = {0} for val in values.values(): if isunitsetint(val): if val == {0}: def_val = None for key in self._keylist: if key not in seen_keys: values.update({key: def_val}) if pass_all: list_keys = list(values.keys()) for key in list_keys: if key not in self._keylist: values.pop(key) return values
def eval_step(self, pred_vals, succ_vals, reverse=False): """ Returns adjusted succ_vals """ if succ_vals is None: if self._delta is None: if all([isscalar(pred_value) for pred_value in pred_vals]): succ_vals = {0} # If not sampling succeeding values, use deterministic call if not isunitsetint(succ_vals): return super().eval_step(pred_vals, succ_vals, reverse=reverse) if self._tfun is not None and self._tfun.callable: succ_vals = self.eval_tfun(pred_vals) elif self._nvars == 1: var = self._varlist[0] tran = var.tran tfun = var.tfun if (tran is not None and not tran.callable) or \ (tfun is not None and tfun.callable): vals, dims, kwargs = var.eval_step(pred_vals[var.name], succ_vals, reverse=reverse) return vals, dims, kwargs raise ValueError("Transitional CDF calling requires callable tfun") else: raise ValueError("Transitional CDF calling requires callable tfun") # Initialise outputs with predecessor values dims = {} kwargs = {'reverse': reverse} vals = collections.OrderedDict() for key in self._keylist: vals.update({key: pred_vals[key]}) if succ_vals is None and self._tran is None: return vals, dims, kwargs # If stepping or have a transition function, add successor values for key in self._keylist: mod_key = key + "'" succ_key = key if mod_key not in succ_vals else mod_key vals.update({key + "'": succ_vals[succ_key]}) return vals, dims, kwargs
def product(*args, **kwds): """ Multiplies two or more PDs subject to the following: 1. They must not share the same marginal variables. 2. Conditional variables must be identical unless contained as marginal from another distribution. """ from probayes.pd import PD # Check pscales, scalars, possible fasttrack if not len(args): return None kwds = dict(kwds) pscales = [arg.pscale for arg in args] pscale = kwds.get('pscale', None) or prod_pscale(pscales) aresingleton = [arg.issingleton for arg in args] maybe_fasttrack = all(aresingleton) and \ np.all(pscale == np.array(pscales)) and \ pscale in [0, 1.] # Collate vals, probs, marg_names, and cond_names as lists vals = [collections.OrderedDict(arg) for arg in args] probs = [arg.prob for arg in args] marg_names = [list(arg.marg.values()) for arg in args] cond_names = [list(arg.cond.values()) for arg in args] # Detect uniqueness in marginal keys and identical conditionals all_marg_keys = [] for arg in args: all_marg_keys.extend(list(arg.marg.keys())) marg_sets = None if len(all_marg_keys) != len(set(all_marg_keys)): marg_keys, cond_keys, marg_sets, = None, None, None for arg in args: if marg_keys is None: marg_keys = list(arg.marg.keys()) elif marg_keys != list(arg.marg.keys()): marg_keys = None break if cond_keys is None: cond_keys = list(arg.cond.keys()) elif cond_keys != list(arg.cond.keys()): marg_keys = None break if marg_keys: are_marg_sets = np.array([isunitsetint(arg[marg_key]) for marg_key in marg_keys]) if marg_sets is None: if np.any(are_marg_sets): marg_sets = are_marg_sets else: marg_keys = None break elif not np.all(marg_sets == are_marg_sets): marg_keys = None break assert marg_keys is not None and marg_sets is not None, \ "Non-unique marginal variables for currently not supported: {}".\ format(all_marg_keys) maybe_fasttrack = True # Maybe fast-track identical conditionals if maybe_fasttrack: marg_same = True cond_same = True if marg_sets is None: # no need to recheck if not None (I think) marg_same = True for name in marg_names[1:]: if marg_names[0] != name: marg_same = False break cond_same = not any(cond_names) if not cond_same: cond_same = True for name in cond_names[1:]: if cond_names[0] != name: cond_same = False break if marg_same and cond_same: marg_names = marg_names[0] cond_names = cond_names[0] prod_marg_name = ','.join(marg_names) prod_cond_name = ','.join(cond_names) prod_name = '|'.join([prod_marg_name, prod_cond_name]) prod_vals = collections.OrderedDict() for i, val in enumerate(vals): areunitsetints = np.array([isunitsetint(_val) for _val in val.values()]) if not np.any(areunitsetints): prod_vals.update(val) else: assert marg_sets is not None, "Variable mismatch" assert np.all(marg_sets == areunitsetints[:len(marg_sets)]), \ "Variable mismatch" if not len(prod_vals): prod_vals.update(collections.OrderedDict(val)) else: for j, key in enumerate(prod_vals.keys()): if areunitsetints[j]: prod_vals.update({key: {list(prod_vals[key])[0] + \ list(val[key])[0]}}) if marg_sets is not None: prob, pscale = prod_rule(*tuple(probs), pscales=pscales, pscale=pscale) return PD(prod_name, prod_vals, dims=args[0].dims, prob=prob, pscale=pscale) else: prod_prob = float(sum(probs)) if iscomplex(pscale) else float(np.prod(probs)) return PD(prod_name, prod_vals, prob=prod_prob, pscale=pscale) # Check cond->marg accounts for all differences between conditionals prod_marg = [name for dist_marg_names in marg_names \ for name in dist_marg_names] prod_marg_name = ','.join(prod_marg) flat_cond_names = [name for dist_cond_names in cond_names \ for name in dist_cond_names] cond2marg = [cond_name for cond_name in flat_cond_names \ if cond_name in prod_marg] prod_cond = [cond_name for cond_name in flat_cond_names \ if cond_name not in cond2marg] cond2marg_set = set(cond2marg) # Check conditionals compatible prod_cond_set = set(prod_cond) cond2marg_dict = {name: None for name in cond2marg} for i, arg in enumerate(args): cond_set = set(cond_names[i]) - cond2marg_set if cond_set: assert prod_cond_set == cond_set, \ "Incompatible product conditional {} for conditional set {}: ".format( prod_cond_set, cond_set) for name in cond2marg: if name in arg.keys(): values = arg[name] if not isscalar(values): values = np.ravel(values) if cond2marg_dict[name] is None: cond2marg_dict[name] = values elif not np.allclose(cond2marg_dict[name], values): raise ValueError("Mismatch in values for condition {}".format(name)) # Establish product name, values, and dimensions prod_keys = str2key(prod_marg + prod_cond) prod_nkeys = len(prod_keys) prod_aresingleton = np.zeros(prod_nkeys, dtype=bool) prod_areunitsetints = np.zeros(prod_nkeys, dtype=bool) prod_cond_name = ','.join(prod_cond) prod_name = prod_marg_name if not len(prod_cond_name) \ else '|'.join([prod_marg_name, prod_cond_name]) prod_vals = collections.OrderedDict() for i, key in enumerate(prod_keys): values = None for val in vals: if key in val.keys(): values = val[key] prod_areunitsetints[i] = isunitsetint(val[key]) if prod_areunitsetints[i]: values = {0} break assert values is not None, "Values for key {} not found".format(key) prod_aresingleton[i] = issingleton(values) prod_vals.update({key: values}) if np.any(prod_areunitsetints): for i, key in enumerate(prod_keys): if prod_areunitsetints[i]: for val in vals: if key in val: assert isunitsetint(val[key]), "Mismatch in variables {} vs {}".\ format(prod_vals, val) prod_vals.update({key: {list(prod_vals[key])[0] + list(val[key])[0]}}) prod_newdims = np.array(np.logical_not(prod_aresingleton)) dims_shared = False for arg in args: argdims = [dim for dim in arg.dims.values() if dim is not None] if len(argdims) != len(set(argdims)): dims_shared = True # Shared dimensions limit product dimensionality if dims_shared: seen_keys = set() for i, key in enumerate(prod_keys): if prod_newdims[i] and key not in seen_keys: for arg in args: if key in arg.dims: dim = arg.dims[key] seen_keys.add(key) for argkey, argdim in arg.dims.items(): seen_keys.add(argkey) if argkey != key and argdim is not None: if dim == argdim: index = prod_keys.index(argkey) prod_newdims[index] = False prod_cdims = np.cumsum(prod_newdims) prod_ndims = prod_cdims[-1] # Fast-track scalar products if maybe_fasttrack and prod_ndims == 0: prob = float(sum(probs)) if iscomplex(pscale) else float(np.prod(probs)) return PD(prod_name, prod_vals, prob=prob, pscale=pscale) # Reshape values - they require no axes swapping ones_ndims = np.ones(prod_ndims, dtype=int) prod_shape = np.ones(prod_ndims, dtype=int) scalarset = set() prod_dims = collections.OrderedDict() for i, key in enumerate(prod_keys): if prod_aresingleton[i]: scalarset.add(key) else: values = prod_vals[key] re_shape = np.copy(ones_ndims) dim = prod_cdims[i]-1 prod_dims.update({key: dim}) re_shape[dim] = values.size prod_shape[dim] = values.size prod_vals.update({key: values.reshape(re_shape)}) # Match probability axes and shapes with axes swapping then reshaping for i in range(len(args)): prob = probs[i] if not isscalar(prob): dims = collections.OrderedDict() for key, val in args[i].dims.items(): if val is not None: dims.update({val: prod_dims[key]}) old_dims = [] new_dims = [] for key, val in dims.items(): if key not in old_dims: old_dims.append(key) new_dims.append(val) if len(old_dims) > 1 and not old_dims == new_dims: max_dims_inc = max(new_dims) + 1 while prob.ndim < max_dims_inc: prob = np.expand_dims(prob, -1) prob = np.moveaxis(prob, old_dims, new_dims) re_shape = np.copy(ones_ndims) for dim in new_dims: re_shape[dim] = prod_shape[dim] probs[i] = prob.reshape(re_shape) # Multiply the probabilities and output the result as a distribution instance prob, pscale = prod_rule(*tuple(probs), pscales=pscales, pscale=pscale) return PD(prod_name, prod_vals, dims=prod_dims, prob=prob, pscale=pscale)
def summate(*args): """ Quick and dirty concatenation """ from probayes.pd import PD if not len(args): return None pscales = [arg.pscale for arg in args] vals = [dict(arg) for arg in args] probs = [arg.prob for arg in args] # Check pscales are the same pscale = pscales[0] for _pscale in pscales[1:]: assert pscale == _pscale, \ "Cannot summate distributions with different pscales" # Check marginal and conditional keys marg_keys = list(args[0].marg.keys()) cond_keys = list(args[0].cond.keys()) for arg in args[1:]: assert marg_keys == list(arg.marg.keys()), \ "Marginal variable names not identical across distributions: {}" assert cond_keys == list(arg.cond.keys()), \ "Conditional variable names not identical across distributions: {}" sum_keys = marg_keys + cond_keys sum_name = ','.join(marg_keys) if cond_keys: sum_name += '|' + ','.join(cond_keys) # If all singleton, concatenate in dimension 0 if all([arg.issingleton for arg in args]): unitsets = {key: isunitsetint(args[0][key]) for key in sum_keys} sum_dims = {key: None if unitsets[key] else 0 for key in sum_keys} sum_vals = {key: 0 if unitsets[key] else [] for key in sum_keys} sum_prob = [] for arg in args: for key, val in arg.items(): if unitsets[key]: assert isunitsetint(val), \ "Cannot mix unspecified set and specified values" sum_vals[key] += list(val)[0] else: assert not isunitsetint(val), \ "Cannot mix unspecified set and specified values" sum_vals[key].append(val) sum_prob.append(arg.prob) for key in sum_keys: if unitsets[key]: sum_vals[key] = {sum_vals[key]} else: sum_vals[key] = np.ravel(sum_vals[key]) sum_prob = np.ravel(sum_prob) return PD(sum_name, sum_vals, dims=sum_dims, prob=sum_prob, pscale=pscale) # 2. all identical but in one dimension: concatenate in that dimension # TODO: fix the remaining code of this function below sum_vals = collections.OrderedDict(args[0]) sum_dims = [None] * (len(args) - 1) for i, arg in enumerate(args): if i == 0: continue for key in marg_keys: if sum_dims[i-1] is not None: continue elif not arg.singleton(key): key_vals = arg[key] if key_vals.size == sum_vals[key].size: if np.allclose(key_vals, sum_vals[key]): continue sum_dims[i-1] = arg.dims[key] assert len(set(sum_dims)) > 1, "Cannot find unique concatenation axis" sum_dim = sum_dims[0] sum_dims = args[0].dims key = marg_keys[sum_dim] sum_prob = np.copy(probs[0]) for i, val in enumerate(vals): if i == 0: continue sum_vals[key] = np.concatenate([sum_vals[key], val[key]], axis=sum_dim) sum_prob = np.concatenate([sum_prob, probs[i]], axis=sum_dim) return PD(sum_name, sum_vals, dims=sum_dims, prob=sum_prob, pscale=pscale)
def eval_step(self, pred_vals, succ_vals, reverse=False): """ Evaluates a successive values from previous values with an optional direction reversal flag, outputting a three-length tuple that includes the successive values in the first argument. :param pred_vals: predecessor values (NumPy array). :param succ_vals: succecessor values (see step()). :param reverse: boolean flag (default False) to reverse direction. :return vals: a dictionary including both predecessor and successor values. :return dims: a dictionary with dimension indices for the values in vals. :return kwargs: a dictionary that includes optional keywords for eval_tran() """ if succ_vals is None: assert self._tran is not None, "No transitional function specified" if isinstance(pred_vals, dict): pred_vals = pred_vals[self.name] kwargs = dict() # to pass over to eval_tran() if succ_vals is None: if self._delta is None: succ_vals = {0} if isscalar(pred_vals) else pred_vals else: delta = self.eval_delta() succ_vals = self.apply_delta(pred_vals, delta) #--------------------------------------------------------------------------- def _reshape_vals(pred, succ): dims = {} ndim = 0 # Now reshape the values according to succ > prev dimensionality if issingleton(succ): dims.update({self._name+"'": None}) else: dims.update({self._name+"'": ndim}) ndim += 1 if issingleton(pred): dims.update({self._name: None}) else: dims.update({self._name: ndim}) ndim += 1 if ndim == 2: # pred_vals distributed along inner dimension: pred = pred.reshape([1, pred.size]) succ = succ.reshape([succ.size, 1]) return pred, succ, dims #--------------------------------------------------------------------------- # Scalar treatment is the most trivial and ignores reverse if self._tran is None or self._tran.isscalar: if isunitsetint(succ_vals): succ_vals = self.evaluate(succ_vals, use_pfun=False)[self._name] elif isunitsetfloat(succ_vals): assert self._vtype in VTYPES[float], \ "Inverse CDF sampling for scalar probabilities unavailable for " + \ "{} data type".format(self._vtype) cdf_val = list(succ_vals)[0] lo, hi = min(self._limits), max(self._limits) succ_val = lo*(1.-cdf_val) + hi*cdf_val if self._ufun is not None: succ_val = self.ufun[-1](succ_val) prob = self._tran() if self._tran is not None else None pred_vals, succ_vals, dims = _reshape_vals(pred_vals, succ_vals) # Handle discrete non-callables elif not self._tran.callable: if reverse and not self._tran.ismulti and not self.__sym_tran: warnings.warn("Reverse direction called from asymmetric transitional") prob = self._tran() if not self._tran.ismulti else \ self._tran[int(reverse)]() if isunitset(succ_vals): succ_vals, pred_idx, succ_idx = matrix_cond_sample(pred_vals, succ_vals, prob=prob, vset=self._vset) kwargs.update({'pred_idx': pred_idx, 'succ_idx': succ_idx}) pred_vals, succ_vals, dims = _reshape_vals(pred_vals, succ_vals) # That just leaves callables else: kwds = {self._name: pred_vals} if isunitset(succ_vals): assert self._tfun is not None, \ "Conditional sampling requires setting CDF and ICDF " + \ "conditional functions using rv.set.tfun()" assert isscalar(pred_vals), \ "Successor sampling only possible with scalar predecessors" succ_vals = list(succ_vals)[0] if type(succ_vals) in VTYPES[int] or type(succ_vals) in VTYPES[np.uint]: lo, hi = min(self._ulims), max(self._ulims) kwds.update({self._name+"'": np.array([lo, hi], dtype=float)}) lohi = self._tfun[0](**kwds) lo, hi = float(min(lohi)), float(max(lohi)) succ_vals = uniform(lo, hi, succ_vals, isinstance(self._vset[0], tuple), isinstance(self._vset[1], tuple)) else: succ_vals = np.atleast_1d(succ_vals) kwds.update({self._name: pred_vals, self._name+"'": succ_vals}) succ_vals = self._tfun[1](**kwds) elif not isscalar(succ_vals): succ_vals = np.atleast_1d(succ_vals) pred_vals, succ_vals, dims = _reshape_vals(pred_vals, succ_vals) vals = collections.OrderedDict({self._name+"'": succ_vals, self._name: pred_vals}) kwargs.update({'reverse': reverse}) return vals, dims, kwargs