def test_single_categorical_col_samples(model): n = 1000 x = [int(y) for y in mu.sample([model], [0], n=n).flatten()] cts = np.bincount(x, minlength=3) assert cts[2] > cts[1] assert cts[0] > cts[2]
def sample(self, cols, given=None, n=1): """ Draw samples from cols Parameters ---------- cols : list(index) List of columns from which to jointly draw. given : list(tuple(int, value)) List of column-value tuples that specify conditions n : int The number of samples to draw Examples -------- Draw whether an animal is fast and agile given that it is bulbous. >>> engine = Engine.load('examples/zoo.bcmodels') >>> engine.sample(['fast', 'agile'], given=[('bulbous': 1])) """ # FIXME: make sure that given does not contain columns from cols if given is not None: given = du.convert_given(given, self._dtypes, self._converters) col_idxs = [self._converters['col2idx'][col] for col in cols] data_out = mu.sample(self._models, col_idxs, given=given, n=n) x = du.convert_data(data_out, cols, self._dtypes, self._converters, to_val=True) if x.shape == ( 1, 1, ): return x[0, 0] elif x.shape[0] == 1: return x[0, :] else: return x
def entropy(self, col, n_samples=500): """ The entropy of a column. Notes ----- Returns differential entropy for continuous feature (obviously). Parameters ---------- col : indexer The name of the column n_samples : int The number of samples to use for the Monte Carlo approximation (if `col` is categorical). Returns ------- h : float The entropy of `col`. """ col_idx = self._converters['col2idx'][col] dtype = self._dtypes[col_idx] # Unless x is enumerable (is categorical), we approximate h(x) using # an importance sampling extimate of h(x) using p(x) as the importance # function. if dtype == 'categorical': k = self._distargs[col_idx][0] x = np.array([[i] for i in range(k)]) logps = mu.probability(x, self._models, (col_idx, )) assert logps.shape == (k, ) h = -np.sum(np.exp(logps) * logps) else: x = mu.sample(self._models, (col_idx, ), n=n_samples) logps = mu.probability(x, self._models, (col_idx, )) h = -np.sum(logps) / n_samples return h
def entropy(self, col, n_samples=500): """ The entropy of a column. Notes ----- Returns differential entropy for continuous feature (obviously). Parameters ---------- col : indexer The name of the column n_samples : int The number of samples to use for the Monte Carlo approximation (if `col` is categorical). Returns ------- h : float The entropy of `col`. """ col_idx = self._converters['col2idx'][col] dtype = self._dtypes[col_idx] # Unless x is enumerable (is categorical), we approximate h(x) using # an importance sampling extimate of h(x) using p(x) as the importance # function. if dtype == 'categorical': k = self._distargs[col_idx][0] x = np.array([[i] for i in range(k)]) logps = mu.probability(x, self._models, (col_idx,)) assert logps.shape == (k,) h = -np.sum(np.exp(logps)*logps) else: x = mu.sample(self._models, (col_idx,), n=n_samples) logps = mu.probability(x, self._models, (col_idx,)) h = -np.sum(logps) / n_samples return h
def sample(self, cols, given=None, n=1): """ Draw samples from cols Parameters ---------- cols : list(index) List of columns from which to jointly draw. given : list(tuple(int, value)) List of column-value tuples that specify conditions n : int The number of samples to draw Examples -------- Draw whether an animal is fast and agile given that it is bulbous. >>> engine = Engine.load('examples/zoo.bcmodels') >>> engine.sample(['fast', 'agile'], given=[('bulbous': 1])) """ # FIXME: make sure that given does not contain columns from cols if given is not None: given = du.convert_given(given, self._dtypes, self._converters) col_idxs = [self._converters['col2idx'][col] for col in cols] data_out = mu.sample(self._models, col_idxs, given=given, n=n) x = du.convert_data(data_out, cols, self._dtypes, self._converters, to_val=True) if x.shape == (1, 1,): return x[0, 0] elif x.shape[0] == 1: return x[0, :] else: return x