def _joint_logpdf(self, bdb, genid, modelno, Q, Y, n_samples=None): # XXX Computes the joint probability of query Q given evidence Y # for a single model. The function is a likelihood weighted # integrator. # XXX Determine. if n_samples is None: n_samples = self.n_samples # Validate inputs. if modelno is None: raise ValueError('Invalid modelno None, integer requried.') if len(Q) == 0: raise ValueError('Invalid query Q: len(Q) == 0.') # Ensure consistency of any duplicates in Q and Y. Q = self._queries_consistent_with_constraints(Q, Y) if Q is None: return float('-inf') for r, _, _ in Q+Y: assert r == Q[0][0], "Cannot assess more than one row, "\ "%s and %s requested" % (Q[0][0], r) # (Q,Y) marginal joint density. _, QY_weights = self._weighted_sample(bdb, genid, modelno, Q[0][0], Q+Y, n_samples=n_samples) # Y marginal density. _, Y_weights = self._weighted_sample(bdb, genid, modelno, Q[0][0], Y, n_samples=n_samples) # XXX TODO Keep sampling until logpQY <= logpY logpQY = logmeanexp(QY_weights) logpY = logmeanexp(Y_weights) return logpQY - logpY
def bql_row_column_predictive_probability(bdb, population_id, generator_id, rowid, colno): value = core.bayesdb_population_cell_value(bdb, population_id, rowid, colno) if value is None: return None # Retrieve all other values in the row. row_values = core.bayesdb_population_row_values(bdb, population_id, rowid) variable_numbers = core.bayesdb_variable_numbers(bdb, population_id, None) # Build the constraints and query from rowid, using a fresh rowid. fresh_rowid = core.bayesdb_population_fresh_row_id(bdb, population_id) query = [(colno, value)] constraints = [(col, value) for (col, value) in zip(variable_numbers, row_values) if (value is not None) and (col != colno)] def generator_predprob(generator_id): metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) return metamodel.logpdf_joint(bdb, generator_id, fresh_rowid, query, constraints, None) generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id) predprobs = map(generator_predprob, generator_ids) r = logmeanexp(predprobs) return ieee_exp(r)
def bql_row_column_predictive_probability(bdb, population_id, generator_id, modelnos, rowid, targets, constraints): targets = json.loads(targets) constraints = json.loads(constraints) modelnos = _retrieve_modelnos(modelnos) # Build the constraints and query from rowid, using a fresh rowid. fresh_rowid = core.bayesdb_population_fresh_row_id(bdb, population_id) def retrieve_values(colnos): values = [ core.bayesdb_population_cell_value(bdb, population_id, rowid, colno) for colno in colnos ] return [(c, v) for (c, v) in zip(colnos, values) if v is not None] cgpm_targets = retrieve_values(targets) # If all targets have NULL values, return None. if len(cgpm_targets) == 0: return None cgpm_constraints = retrieve_values(constraints) def generator_predprob(generator_id): metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) return metamodel.logpdf_joint(bdb, generator_id, modelnos, fresh_rowid, cgpm_targets, cgpm_constraints) generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id) predprobs = map(generator_predprob, generator_ids) r = logmeanexp(predprobs) return ieee_exp(r)
def logpdf_joint( self, bdb, generator_id, modelnos, rowid, targets, _constraints, ): # Note: The constraints are irrelevant for the same reason as # in simulate_joint. (all_mus, all_sigmas) = self._all_mus_sigmas(bdb, generator_id) def model_log_pdf(modelno): mus = all_mus[modelno] sigmas = all_sigmas[modelno] def logpdf_1((colno, x)): return self._logpdf_1(bdb, generator_id, mus, sigmas, colno, x) return sum(map(logpdf_1, targets)) # XXX Ignore modelnos and aggregate over all of them. modelwise = [model_log_pdf(m) for m in sorted(all_mus.keys())] return logmeanexp(modelwise)
def bql_row_column_predictive_probability( bdb, population_id, generator_id, modelnos, rowid, targets, constraints): targets = json.loads(targets) constraints = json.loads(constraints) modelnos = _retrieve_modelnos(modelnos) # Build the constraints and query from rowid, using a fresh rowid. fresh_rowid = core.bayesdb_population_fresh_row_id(bdb, population_id) def retrieve_values(colnos): values = [ core.bayesdb_population_cell_value(bdb, population_id, rowid, colno) for colno in colnos ] return [(c,v) for (c,v) in zip (colnos, values) if v is not None] cgpm_targets = retrieve_values(targets) # If all targets have NULL values, return None. if len(cgpm_targets) == 0: return None cgpm_constraints = retrieve_values(constraints) def generator_predprob(generator_id): backend = core.bayesdb_generator_backend(bdb, generator_id) return backend.logpdf_joint( bdb, generator_id, modelnos, fresh_rowid, cgpm_targets, cgpm_constraints) generator_ids = _retrieve_generator_ids(bdb, population_id, generator_id) predprobs = map(generator_predprob, generator_ids) r = logmeanexp(predprobs) return ieee_exp(r)
def logpdf_joint(self, bdb, generator_id, targets, constraints, modelno): if modelno is None: modelnos = core.bayesdb_generator_modelnos(bdb, generator_id) else: modelnos = [modelno] with bdb.savepoint(): return logmeanexp([self._joint_logpdf(bdb, generator_id, modelno, targets, constraints) for modelno in modelnos])
def logpdf_joint(self, bdb, generator_id, targets, _constraints, modelno=None): # Note: The constraints are irrelevant for the same reason as # in simulate_joint. (all_mus, all_sigmas) = self._all_mus_sigmas(bdb, generator_id) def model_log_pdf(modelno): return sum(logpdf_gaussian(value, all_mus[modelno][colno], all_sigmas[modelno][colno]) for (_, colno, value) in targets) modelwise = [model_log_pdf(m) for m in sorted(all_mus.keys())] return logmeanexp(modelwise)
def logpdf_joint(self, bdb, generator_id, modelnos, rowid, targets, _constraints,): # Note: The constraints are irrelevant for the same reason as # in simulate_joint. (all_mus, all_sigmas) = self._all_mus_sigmas(bdb, generator_id) def model_log_pdf(modelno): mus = all_mus[modelno] sigmas = all_sigmas[modelno] def logpdf_1((colno, x)): return self._logpdf_1(bdb, generator_id, mus, sigmas, colno, x) return sum(map(logpdf_1, targets)) # XXX Ignore modelnos and aggregate over all of them. modelwise = [model_log_pdf(m) for m in sorted(all_mus.keys())] return logmeanexp(modelwise)
def bql_row_column_predictive_probability(bdb, population_id, generator_id, rowid, colno): value = core.bayesdb_population_cell_value( bdb, population_id, rowid, colno) if value is None: return None def generator_predprob(generator_id): metamodel = core.bayesdb_generator_metamodel(bdb, generator_id) return metamodel.logpdf_joint( bdb, generator_id, [(rowid, colno, value)], [], None) generator_ids = [generator_id] if generator_id is not None else \ core.bayesdb_population_generators(bdb, population_id) predprobs = map(generator_predprob, generator_ids) r = logmeanexp(predprobs) return ieee_exp(r)
def test_logmeanexp(): assert logmeanexp([-1000., -1000.]) == -1000. assert relerr(math.log(0.5 * (1 + math.exp(-1.))), logmeanexp([0., -1.])) \ < 1e-15 assert relerr(math.log(0.5), logmeanexp([0., -1000.])) < 1e-15