def simulate(self, rowid, targets, constraints=None, inputs=None, N=None): if rowid in self.rowid_to_component: assert not constraints or self.indexer not in constraints z = self.rowid_to_component[rowid] return self._simulate_one(rowid, targets, constraints, inputs, N, z) elif constraints and self.indexer in constraints: z = constraints[self.indexer] if z not in self.cgpm_row_divide.support(): raise ValueError('Constrained cluster has 0 density: %s' % (z, )) return self._simulate_one(rowid, targets, constraints, inputs, N, z) z_support = self.cgpm_row_divide.support() z_weights = [ self.logpdf(rowid, {self.indexer: z}, constraints, inputs) for z in z_support ] zs = log_pflip(z_weights, array=z_support, size=(N or 1), rng=self.rng) counts = {z: n for z, n in enumerate(np.bincount(zs)) if n} samples = [ self._simulate_one(rowid, targets, constraints, inputs, n, z) for z, n in counts.iteritems() ] return samples[0][0] if N is None else lchain(*samples)
def transition_hypers(cgpms, grids, rng): """Transitions hyperparameters of cgpms greedily.""" assert all([isinstance(cgpm, DistributionCGPM) for cgpm in cgpms]) assert all([type(cgpm) is type(cgpms[0]) for cgpm in cgpms]) hyperparams = cgpms[0].get_hypers() shuffled_hypers = rng.permutation(hyperparams.keys()) # For each hyper. for hyper in shuffled_hypers: logps = [] # For each grid point. for grid_value in grids[hyper]: # Compute the probability of the grid point. hyperparams[hyper] = grid_value logp_k = 0 for cgpm in cgpms: cgpm.set_hypers(hyperparams) logp_k += cgpm.logpdf_score() logps.append(logp_k) # Sample a new hyperparameter from the grid. index = log_pflip(logps, rng=rng) hyperparams[hyper] = grids[hyper][index] # Set the hyperparameters for each cgpm. for cgpm in cgpms: cgpm.set_hypers(hyperparams) return hyperparams
def simulate(self, rowid, targets, constraints=None, inputs=None, N=None): DistributionCGPM.simulate(self, rowid, targets, constraints, inputs, N) if rowid in self.data and not isnan(self.data[rowid]): return {self.outputs[0]: self.data[rowid]} p0 = calc_predictive_logp(0, self.N, self.x_sum, self.alpha, self.beta) p1 = calc_predictive_logp(1, self.N, self.x_sum, self.alpha, self.beta) x = log_pflip([p0, p1], rng=self.rng) return {self.outputs[0]: x}
def simulate(self, rowid, targets, constraints=None, inputs=None, N=None): DistributionCGPM.simulate(self, rowid, targets, constraints, inputs, N) if rowid not in self.data: K = self.support() logps = [self.logpdf(rowid, {targets[0]: x}, None) for x in K] x = log_pflip(logps, array=K, rng=self.rng) else: x = self.data[rowid] return {self.outputs[0]: x}
def simulate(self, rowid, targets, constraints=None, inputs=None, N=None): DistributionGpm.simulate(self, rowid, targets, constraints, inputs, N) if rowid in self.data: return {self.outputs[0]: self.data[rowid]} p0 = Bernoulli.calc_predictive_logp(0, self.N, self.x_sum, self.alpha, self.beta) p1 = Bernoulli.calc_predictive_logp(1, self.N, self.x_sum, self.alpha, self.beta) x = gu.log_pflip([p0, p1], rng=self.rng) return {self.outputs[0]: x}
def simulate(self, rowid, targets, constraints=None, inputs=None, N=None): DistributionGpm.simulate(self, rowid, targets, constraints, inputs, N) if rowid in self.data: x = self.data[rowid] else: K = sorted(self.counts) + [max(self.counts) + 1] if self.counts\ else [0] logps = [self.logpdf(rowid, {targets[0]: x}, None) for x in K] x = gu.log_pflip(logps, array=K, rng=self.rng) return {self.outputs[0]: x}
def simulate(self, rowid, targets, constraints=None, inputs=None, N=None): assert targets == self.outputs assert not constraints if rowid in self.data.x: return {self.outputs[0]: self.data.x[rowid]} logps = [ self.logpdf(rowid, {targets[0]: x}, None, inputs) for x in xrange(self.k) ] x = gu.log_pflip(logps, rng=self.rng) return {self.outputs[0]: x}
def transition_rows(cgpm_mixture, rowid, rng): """Performs a Gibbs step on the rowid in the given cgpm_mixture.""" assert isinstance(cgpm_mixture, (FiniteRowMixture, FlexibleRowMixture)) observation, inputs = cgpm_mixture.unobserve(rowid) zs = cgpm_mixture.cgpm_row_divide.support() logps = [] for z in zs: observation[cgpm_mixture.cgpm_row_divide.outputs[0]] = z logp_z = cgpm_mixture.logpdf(None, observation, None, inputs) logps.append(logp_z) assignment = log_pflip(logps, array=zs, rng=rng) observation[cgpm_mixture.cgpm_row_divide.outputs[0]] = assignment cgpm_mixture.observe(rowid, observation, inputs)
def simulate(self, rowid, targets, constraints=None, inputs=None, N=None): constraints = constraints or {} inputs = inputs or {} # Generate samples and weights. samples, weights = zip(*[ self.weighted_sample(rowid, targets, constraints, inputs) for _i in xrange(self.accuracy) ]) # Sample importance resample. if all(isinf(l) for l in weights): raise ValueError('Zero density constraints: %s' % (constraints, )) index = 0 if self.accuracy == 1 else log_pflip(weights, rng=self.rng) return {q: samples[index][q] for q in targets}
def _gibbs_transition_row(self, rowid): # Probability of row crp assignment to each cluster. K = self.crp.clusters[0].gibbs_tables(rowid) logp_crp = self.crp.clusters[0].gibbs_logps(rowid) # Probability of row data in each cluster. logp_data = self._logpdf_row_gibbs(rowid, K) assert len(logp_data) == len(logp_crp) # Sample new cluster. p_cluster = np.add(logp_data, logp_crp) z_b = gu.log_pflip(p_cluster, array=K, rng=self.rng) # Migrate the row. if self.Zr(rowid) != z_b: self._migrate_row(rowid, z_b) self._check_partitions()
def transition_params(self, N=None): num_transitions = N if N is not None else 1 for i in xrange(num_transitions): # Transition noise parameter. alphas = np.linspace(0.01, 0.99, 30) alpha_logps = [ RandomForest.calc_log_likelihood(self.data.x.values(), self.data.Y.values(), self.regressor, self.counts, a) for a in alphas ] self.alpha = gu.log_pflip(alpha_logps, array=alphas, rng=self.rng) # Transition forest. if len(self.data.Y) > 0: self.regressor.fit(self.data.Y.values(), self.data.x.values())
def simulate(self, rowid, targets, constraints=None, inputs=None, N=None): if constraints is None: constraints = {} if inputs is None: inputs = {} samples, weights = zip(*[ self.weighted_sample(rowid, targets, constraints, inputs) for _i in xrange(self.accuracy) ]) if all(isinf(l) for l in weights): raise ValueError('Zero density constraints: %s' % (constraints, )) # Skip an expensive random choice if there is only one option. index = 0 if self.accuracy == 1 else \ gu.log_pflip(weights, rng=self.rng) return {q: samples[index][q] for q in targets}
def view_simulate(view, rowid, targets, constraints, N): if not view.hypothetical(rowid): return _simulate_row(view, targets, view.Zr(rowid), N) Nk = view.Nk() N_rows = len(view.Zr()) K = view.crp.clusters[0].gibbs_tables(-1) lp_crp = [Crp.calc_predictive_logp(k, N_rows, Nk, view.alpha()) for k in K] lp_constraints = [_logpdf_row(view, constraints, k) for k in K] if all(np.isinf(lp_constraints)): raise ValueError('Zero density constraints: %s' % (constraints, )) lp_cluster = np.add(lp_crp, lp_constraints) ks = log_pflip(lp_cluster, array=K, size=N, rng=view.rng) counts = {k: n for k, n in enumerate(np.bincount(ks)) if n > 0} samples = (_simulate_row(view, targets, k, counts[k]) for k in counts) return chain.from_iterable(samples)
def simulate(self, rowid, targets, constraints=None, inputs=None, N=None): assert targets == self.outputs assert not constraints if rowid in self.data: samples = [self.data[rowid]] * (N or 1) elif not self.regressor: samples = self.rng.choice(range(self.k), size=(N or 1)) else: y_dum = self.process_inputs(inputs) y_dum_probe = np.reshape(y_dum, (1, -1)) logps = self.regressor.predict_log_proba(y_dum_probe) samples = log_pflip(logps[0], array=self.class_to_index.keys(), size=(N or 1), rng=self.rng) return dictify_samples(self.outputs[0], samples, N)
def _likelihood_weighted_resample(self, samples, rowid, constraints=None, inputs=None, statenos=None, multiprocess=1): assert len(samples) == \ len(self.states) if statenos is None else len(statenos) assert all(len(s) == len(samples[0]) for s in samples[1:]) N = len(samples[0]) weights = np.zeros(len(samples)) if not constraints else \ self.logpdf(rowid, constraints, inputs, statenos=statenos, multiprocess=multiprocess) n_model = np.bincount(gu.log_pflip(weights, size=N, rng=self.rng)) indexes = [self.rng.choice(N, size=n, replace=False) for n in n_model] resamples = [ [s[i] for i in index] for s, index in zip(samples, indexes) if len(index) > 0 ] return list(itertools.chain.from_iterable(resamples))
def transition_hypers_full(cgpms, grids, rng): """Transitions hyperparameters of cgpms using full grid search.""" assert all([isinstance(cgpm, DistributionCGPM) for cgpm in cgpms]) assert all([type(cgpm) is type(cgpms[0]) for cgpm in cgpms]) hypers = grids.keys() cells = list(itertools.product(*(grids.itervalues()))) logps = [] for cell in cells: proposal = dict(zip(hypers, cell)) logp_cell = 0 for cgpm in cgpms: cgpm.set_hypers(proposal) logp_cell += cgpm.logpdf_score() logps.append(logp_cell) index = log_pflip(logps, rng=rng) selected = dict(zip(hypers, cells[index])) for cgpm in cgpms: cgpm.set_hypers(selected) return selected, cells, logps
def simulate(self, rowid, targets, constraints=None, inputs=None, N=None): # Refer to comment in logpdf. constraints = self._populate_constraints(rowid, targets, constraints) if not self.hypothetical(rowid): rowid = None network = self.build_network() # Condition on the cluster assignment. if self.outputs[0] in constraints: return network.simulate(rowid, targets, constraints, inputs, N) # Determine how many samples to return. unwrap_result = N is None if unwrap_result: N = 1 # Expose cluster assignments to the samples? exposed = self.outputs[0] in targets if exposed: targets = [q for q in targets if q != self.outputs[0]] # Weight clusters by probability of constraints in each cluster. K = self.crp.clusters[0].gibbs_tables(-1) constr2 = [merged(constraints, {self.outputs[0]: k}) for k in K] lp_constraints_unorm = [network.logpdf(rowid, ev) for ev in constr2] # Find number of samples in each cluster. Ks = gu.log_pflip(lp_constraints_unorm, array=K, size=N, rng=self.rng) counts = {k: n for k, n in enumerate(np.bincount(Ks)) if n > 0} # Add the cluster assignment to the constraints and sample the rest. constr3 = { k: merged(constraints, {self.outputs[0]: k}) for k in counts } samples = [ network.simulate(rowid, targets, constr3[k], inputs, counts[k]) for k in counts ] # If cluster assignments are exposed, append them to the samples. if exposed: samples = [[merged(l, {self.outputs[0]: k}) for l in s] for s, k in zip(samples, counts)] # Return 1 sample if N is None, otherwise a list. result = list(itertools.chain.from_iterable(samples)) return result[0] if unwrap_result else result
def transition_hypers(self): """Transitions the hyperparameters of each cluster.""" hypers = self.hypers.keys() self.rng.shuffle(hypers) # For each hyper. for hyper in hypers: logps = [] # For each grid point. for grid_value in self.hyper_grids[hyper]: # Compute the probability of the grid point. self.hypers[hyper] = grid_value logp_k = 0 for k in self.clusters: self.clusters[k].set_hypers(self.hypers) logp_k += self.clusters[k].logpdf_score() logps.append(logp_k) # Sample a new hyperparameter from the grid. index = gu.log_pflip(logps, rng=self.rng) self.hypers[hyper] = self.hyper_grids[hyper][index] # Set the hyperparameters in each cluster. for k in self.clusters: self.clusters[k].set_hypers(self.hypers) self.aux_model = self.create_aux_model()