def gen_partition_from_weights(n_rows, n_cols, view_weights, clusters_weights): n_views = len(view_weights) Zv = [v for v in range(n_views)] for _ in range(n_cols-n_views): v = utils.pflip(view_weights) Zv.append(v) random.shuffle(Zv) assert len(Zv) == n_cols Zc = [] for v in range(n_views): n_clusters = len(clusters_weights[v]) Z = [c for c in range(n_clusters)] for _ in range(n_rows-n_clusters): c_weights = numpy.copy(clusters_weights[v]) c = utils.pflip(c_weights) Z.append(c) random.shuffle(Z) Zc.append(Z) assert len(Zc) == n_views assert len(Zc[0]) == n_rows return Zv, Zc
def _gen_multinomial_data_column(Z, separation=.9, distargs=None): n_rows = len(Z) K = distargs['K'] if separation > .95: separation = .95 Tc = numpy.zeros(n_rows, dtype=int) C = max(Z)+1 theta_arrays = [numpy.random.dirichlet(numpy.ones(K)*(1.0-separation), 1) for _ in range(C)] for r in range(n_rows): cluster = Z[r] thetas = theta_arrays[cluster][0] x = int(utils.pflip(thetas)) Tc[r] = x return Tc
def _simple_predictive_sample_unobserved(state, cols, N=1): # get if in the same view views = [state.Zv[col] for col in cols] # get a list of the views V = list(set(views)) pK_dict = dict() for v in V: log_pK = get_cluster_crps(state, v) pK = numpy.exp(utils.log_normalize(log_pK)) pK_dict[v] = pK cluster_sets = dict() for col in cols: cluster_sets[col] = create_cluster_set(state, col) draws = [] i = 0 view_dict = dict() for v in V: which_cols = numpy.nonzero(views == v)[0] view_dict[v] = [col for col in which_cols] for _ in range(N): row_data = [] for v in V: cols_v = view_dict[v] k = utils.pflip(pK_dict[v]) for col in cols_v: x = cluster_sets[col][k].predictive_draw() row_data.append(x) draws.append(row_data) return draws
def predictive_draw(self): return utils.pflip(w)