def test_simple_nonconj(): rng = irm.RNG() irm_model = irmio.create_model_from_data(data_simple_nonconj, rng=rng) irmio.set_model_latent(irm_model, latent_simple_nonconj, rng=rng) a = irm_model.domains['d1'].get_assignments() axes = irm_model.relations['R1'].get_axes() axes_objs = [(irm_model.domains[dn], irm_model.domains[dn].get_relation_pos('R1')) for dn in axes] comps = model.get_components_in_relation(axes_objs, irm_model.relations['R1']) g0 = a[0] g1 = a[2] g2 = a[4] assert_approx_equal(comps[g0, g0]['p'], 0.0) assert_approx_equal(comps[g0, g1]['p'], 0.01) assert_approx_equal(comps[g0, g2]['p'], 0.02) assert_approx_equal(comps[g1, g0]['p'], 0.1) assert_approx_equal(comps[g1, g1]['p'], 0.11) assert_approx_equal(comps[g1, g2]['p'], 0.12) assert_approx_equal(comps[g2, g0]['p'], 0.2) assert_approx_equal(comps[g2, g1]['p'], 0.21) assert_approx_equal(comps[g2, g2]['p'], 0.22)
def test_mixture(): N = 100 np.random.seed(0) d = np.zeros(N, dtype=np.float32) for i in range(N / 2): d[i] = np.random.normal(-4, 1) d[i + N / 2] = np.random.normal(4, 1) d = np.random.permutation(d) desc = {'f1': {'data': d, 'model': 'NormalInverseChiSq'}} latent, data = connattribio.create_mm(desc) latent['domains']['d1']['assignment'] = np.arange(N) % 10 latent, data = irm.data.synth.prior_generate(latent, data) rng = irm.RNG() irm_model = irm.irmio.create_model_from_data(data, rng=rng) irm.irmio.set_model_latent(irm_model, latent, rng) kernel_config = irm.runner.default_kernel_anneal() for i in range(200): irm.runner.do_inference(irm_model, rng, kernel_config, i) new_latent = irm.irmio.get_latent(irm_model) a = new_latent['domains']['d1']['assignment'] print irm.util.assign_to_counts(a) print new_latent['relations']['r_f1']['hps']
def run_bbconj(infilename, outfilename, seed): ITERS = SAMPLER_ITERS np.random.seed(seed) indata = pickle.load(open(infilename, 'r')) model_name = "BetaBernoulli" kc = irm.runner.default_kernel_config() data = indata['connectivity'] irm_config = irm.irmio.default_graph_init(data, model_name) rng = irm.RNG() model = irm.irmio.model_from_config(irm_config, init='crp', rng=rng) scores = [] states = [] comps = [] for i in range(ITERS): print "iteration", i irm.runner.do_inference(model, rng, kc) a = model.domains['t1'].get_assignments() scores.append(model.total_score()) states.append(a) pickle.dump({ 'scores': scores, 'states': states, 'infile': infilename }, open(outfilename, 'w'))
def test_io_score_t1t2(): rng = irm.RNG() for D1_N, D2_N in [(10, 20), (20, 30), (200, 300)]: for model_name in ["BetaBernoulliNonConj", "LogisticDistance", "LinearDistance"]: d = {'domains' : {'d1' : {'N' : D1_N}, 'd2' : {'N' : D2_N}}, 'relations' : {'R1' : {'relation' : ('d1', 'd2'), 'model' : model_name}}} l = {} new_latent, new_data = data.synth.prior_generate(l, d) irm_model = irmio.create_model_from_data(new_data, rng=rng) irmio.set_model_latent(irm_model, new_latent, rng=rng) s1 = irm_model.total_score() extracted_latent = irmio.get_latent(irm_model) irm_model2 = irmio.create_model_from_data(new_data, rng=rng) irmio.set_model_latent(irm_model, extracted_latent, rng=rng) s2 = irm_model.total_score() np.testing.assert_approx_equal(s1, s2, 5)
def create_truth_bb(dbfile, outfiles): conn = sqlite3.connect(dbfile) for THOLD_i, outfile in zip(THOLDS, outfiles): cells, conn_mat, dist_mats = preprocess.create_data( conn, process.THOLDS[THOLD_i]) irm_latent, irm_data = irm.irmio.default_graph_init( conn_mat, 'BetaBernoulliNonConj') irm_latent['relations']['R1']['hps'] = {'alpha': 1.0, 'beta': 1.0} irm_latent['domains']['d1'][ 'assignment'] = irm.util.canonicalize_assignment(cells['type_id']) irm_model = irm.irmio.create_model_from_data(irm_data) rng = irm.RNG() irm.irmio.set_model_latent(irm_model, irm_latent, rng) irm.irmio.estimate_suffstats(irm_model, rng, ITERS=40) learned_latent = irm.irmio.get_latent(irm_model) pred = compute_prob_matrix(learned_latent, irm_data, model_name="BetaBernoulliNonConj") pickle.dump( { 'pred_mat': pred, 'truth_mat': irm_data['relations']['R1']['data'], 'thold_i': THOLD_i }, open(outfile, 'w'))
def create_truth(dbfile, outfiles): conn = sqlite3.connect(dbfile) for THOLD_i, outfile in zip(THOLDS, outfiles): cells, conn_mat, dist_mats = preprocess.create_data( conn, process.THOLDS[THOLD_i]) irm_latent, irm_data = models.create_conn_dist_lowlevel( conn_mat, dist_mats, 'xyz', model_name="LogisticDistance") irm_latent['relations']['R1']['hps'] = { 'lambda_hp': 50.0, 'mu_hp': 50.0, 'p_max': 0.9, 'p_min': 0.01 } irm_latent['domains']['d1'][ 'assignment'] = irm.util.canonicalize_assignment(cells['type_id']) irm_model = irm.irmio.create_model_from_data(irm_data) rng = irm.RNG() irm.irmio.set_model_latent(irm_model, irm_latent, rng) irm.irmio.estimate_suffstats(irm_model, rng, ITERS=40) learned_latent = irm.irmio.get_latent(irm_model) pred = compute_prob_matrix(learned_latent, irm_data) pickle.dump( { 'pred_mat': pred, 'truth_mat': irm_data['relations']['R1']['data']['link'], 'thold_i': THOLD_i }, open(outfile, 'w'))
def test_slice_normal(): def dens(x): #mixture of gaussian mus = [-1.5, 2] vars = [1, 1] pis = [0.25, 0.75] return np.logaddexp.accumulate([(np.log(pi) + util.log_norm_dens(x, mu, var)) for (pi, mu, var) in zip(pis, mus, vars)])[-1] # return util.log_norm_dens(x, 0, 1.0) rng = irm.RNG() ITERS = 100000 x = 0 results = np.zeros(ITERS) for i in range(ITERS): x = irm.slice_sample(x, dens, rng, 50.0) results[i] = x MIN = -5 MAX = 5 BINS = 100 x = np.linspace(MIN, MAX, BINS) bin_width = x[1] - x[0] y = [dens(a + bin_width/2) for a in x[:-1]] p = np.exp(y) p = p/np.sum(p)/(x[1]-x[0]) hist, bin_edges = np.histogram(results, x, normed=True) kl= util.kl(hist, p) assert kl < 0.1
def test_simple_nonconj_inout(): rng = irm.RNG() irm_model = irmio.create_model_from_data(data_simple_nonconj, rng=rng) irmio.set_model_latent(irm_model, latent_simple_nonconj, rng=rng) latent = irmio.get_latent(irm_model) irmio.latent_equality(latent_simple_nonconj, latent, data_simple_nonconj)
def run_inference(infile, outfile): data = pickle.load(open(infile, 'r')) df = data['featuredf'] df_vals = df[np.isfinite(df['contact_x_mean'])][:100] N = len(df_vals) desc = { 'soma_x': { 'data': to_f32(df_vals['soma_x']), 'model': 'NormalInverseChiSq' }, # 'contact_spatial_std' : {'data' : to_f32(df_vals['contact_spatial_std']), # 'model' : 'NormalInverseChiSq'}, } for i, bi in enumerate(features.BINS[:-1]): a = np.array( [row['contact_area_hist'][i] for row_i, row in df_vals.iterrows()], dtype=np.float32) print a desc['contact_x_hist_%d' % i] = { 'data': a, 'model': 'NormalInverseChiSq' } latent, data = connattribio.create_mm(desc) latent['domains']['d1']['assignment'] = np.arange(N) % 40 latent, data = irm.data.synth.prior_generate(latent, data) rng = irm.RNG() irm_model = irm.irmio.create_model_from_data(data, rng=rng) irm.irmio.set_model_latent(irm_model, latent, rng) kernel_config = irm.runner.default_kernel_anneal() kernel_config[0][1]['subkernels'][-1][1]['grids'][ 'NormalInverseChiSq'] = irm.gridgibbshps.default_grid_normal_inverse_chi_sq( mu_scale=10, var_scale=1, GRIDN=10) kernel_config[0][1]['subkernels'][-1][1]['grids'][ 'r_soma_x'] = soma_x_hp_grid() MAX_ITERS = 200 for i in range(MAX_ITERS): irm.runner.do_inference(irm_model, rng, kernel_config, i) new_latent = irm.irmio.get_latent(irm_model) a = new_latent['domains']['d1']['assignment'] print irm.util.assign_to_counts(a) print "i=", i, "MAX_ITERS=", MAX_ITERS pickle.dump({ 'assignment': a, 'latent': new_latent, 'data': data }, open(outfile, 'w'))
def create_init(latent_filename, data_filename, out_filenames, init=None, keep_ground_truth=True): """ CONVENTION: when we create N inits, the first is actually initialized from the "ground truth" of the intial init (whatever that happened to be) # FIXME : add ability to init multiple domains """ irm_latent = pickle.load(open(latent_filename, 'r')) irm_data = pickle.load(open(data_filename, 'r')) irm_latents = [] rng = irm.RNG() irm_model = irm.irmio.create_model_from_data(irm_data, rng=rng) for c, out_f in enumerate(out_filenames): print "generating init", out_f np.random.seed(c) latent = copy.deepcopy(irm_latent) d_N = len(latent['domains']['d1']['assignment']) if init['type'] == 'fixed': group_num = init['group_num'] a = np.arange(d_N) % group_num a = np.random.permutation(a) elif init['type'] == 'crp': alpha = init['alpha'] a = irm.util.crp_draw(d_N, alpha) a = np.random.permutation(a) elif init['type'] == 'truth': a = latent['domains']['d1']['assignment'] else: raise NotImplementedError("Unknown init type") if (not keep_ground_truth) or (c > 0): # first one stays the same latent['domains']['d1']['assignment'] = a # generate new suffstats, recompute suffstats in light of new assignment irm.irmio.set_model_latent(irm_model, latent, rng) print "estimating suffstats for %s" % out_f irm.irmio.estimate_suffstats(irm_model, rng, ITERS=2) print "ss estimation done for ", out_f pickle.dump(irm.irmio.get_latent(irm_model), open(out_f, 'w'))
def create_init_pure(irm_latent, irm_data, OUT_N, init=None, keep_ground_truth=True): """ CONVENTION: when we create N inits, the first is actually initialized from the "ground truth" of the intial init (whatever that happened to be) # FIXME : add ability to init multiple domains """ irm_latents = [] rng = irm.RNG() irm_model = irm.irmio.create_model_from_data(irm_data, rng=rng) for c in range(OUT_N): np.random.seed(c) latent = copy.deepcopy(irm_latent) d_N = len(latent['domains']['d1']['assignment']) if init['type'] == 'fixed': group_num = init['group_num'] a = np.arange(d_N) % group_num a = np.random.permutation(a) elif init['type'] == 'crp': alpha = init['alpha'] a = irm.util.crp_draw(d_N, alpha) a = np.random.permutation(a) elif init['type'] == 'truth': a = latent['domains']['d1']['assignment'] else: raise NotImplementedError("Unknown init type") if (not keep_ground_truth) or (c > 0): # first one stays the same latent['domains']['d1']['assignment'] = a # generate new suffstats, recompute suffstats in light of new assignment irm.irmio.set_model_latent(irm_model, latent, rng) irm.irmio.estimate_suffstats(irm_model, rng, ITERS=2) yield irm.irmio.get_latent(irm_model)
def run_ld(infilename, outfilename, seed): ITERS = SAMPLER_ITERS np.random.seed(seed) indata = pickle.load(open(infilename, 'r')) model_name = "LogisticDistance" kc = irm.runner.default_kernel_nonconj_config() kc[0][1]['M'] = 30 data = indata['conn_and_dist'] irm_config = irm.irmio.default_graph_init(data, model_name) HPS = {'mu_hp': 1.0, 'lambda_hp': 1.0, 'p_min': 0.1, 'p_max': 0.9} irm_config['relations']['R1']['hps'] = HPS rng = irm.RNG() model = irm.irmio.model_from_config(irm_config, init='crp', rng=rng) rel = model.relations['R1'] doms = [(model.domains['t1'], 0), (model.domains['t1'], 0)] scores = [] states = [] comps = [] for i in range(ITERS): print "iteration", i irm.runner.do_inference(model, rng, kc) a = model.domains['t1'].get_assignments() components = irm.model.get_components_in_relation(doms, rel) scores.append(model.total_score()) states.append(a) comps.append(components) pickle.dump( { 'scores': scores, 'states': states, 'components': components, 'infile': infilename, 'hps': HPS }, open(outfilename, 'w'))
def test_mixture_bb(): ENTITY_PER_GROUP = 50 GROUPS = 4 N = ENTITY_PER_GROUP * GROUPS DIM = 4 np.random.seed(0) gv = np.random.beta(0.2, 0.2, size=(GROUPS, DIM)) mat = np.zeros((N, DIM), dtype=np.uint8) for g in range(GROUPS): for i in range(ENTITY_PER_GROUP): for d in range(DIM): mat[g * ENTITY_PER_GROUP + i, d] = np.random.rand() < gv[g, d] #mat = np.random.permutation(mat) desc = {} for d in range(DIM): desc['f%d' % d] = {'data': mat[:, d], 'model': 'BetaBernoulli'} latent, data = connattribio.create_mm(desc) latent['domains']['d1']['assignment'] = np.arange(N) % 10 latent, data = irm.data.synth.prior_generate(latent, data) rng = irm.RNG() irm_model = irm.irmio.create_model_from_data(data, rng=rng) irm.irmio.set_model_latent(irm_model, latent, rng) kernel_config = irm.runner.default_kernel_anneal() for i in range(150): irm.runner.do_inference(irm_model, rng, kernel_config, i) new_latent = irm.irmio.get_latent(irm_model) a = new_latent['domains']['d1']['assignment'] print irm.util.assign_to_counts(a) print new_latent['relations']['r_f1']['hps'] pylab.imshow(mat[np.argsort(a)]) pylab.show()
def test_slice_exp(): """ Test on a distribution with support on the positive reals """ def dens(x): #mixture of gaussian lamb = 2.47 if x < 0: return -np.inf else: return -x * lamb # return util.log_norm_dens(x, 0, 1.0) rng = irm.RNG() ITERS = 1000000 x = 0 results = np.zeros(ITERS) for i in range(ITERS): x = irm.slice_sample(x, dens, rng, 0.5) results[i] = x MIN = -1 MAX = 4 BINS = 101 x = np.linspace(MIN, MAX, BINS) bin_width = x[1] - x[0] y = [dens(a + bin_width/2) for a in x[:-1]] p = np.exp(y) p = p/np.sum(p)/(x[1]-x[0]) hist, bin_edges = np.histogram(results, x, normed=True) kl= util.kl(hist, p) assert kl < 0.1
def test_parallel_tempering(): rng = irm.RNG() D1_N = 100 model_name = "BetaBernoulliNonConj" d = {'domains' : {'d1' : {'N' : D1_N}}, 'relations' : {'R1' : {'relation' : ('d1', 'd1'), 'model' : model_name}}} l = {} new_latent, new_data = data.synth.prior_generate(l, d) config = [('parallel_tempering', {'temps' : [1.0, 2.0, 4.0, 8.0], 'subkernels' : runner.default_kernel_nonconj_config()})] r = runner.Runner(new_latent, new_data, config) for i in range(100): print "tt", i, r.get_score() r.run_iters(1)
def test_io(): N = 10 desc = { 'f1': { 'data': np.zeros(N, dtype=np.bool), 'model': 'BetaBernoulli' } } latent, data = connattribio.create_mm(desc) latent, data = irm.data.synth.prior_generate(latent, data) print data print latent assert_equal(len(latent['domains']), 2) assert_equal(len(latent['relations']), 1) assert_equal(len(data['domains']), 2) assert_equal(len(data['relations']), 1) rng = irm.RNG() irm_model = irm.irmio.create_model_from_data(data, rng=rng) irm.irmio.set_model_latent(irm_model, latent, rng)
def test_set_components(): """ """ T1_N = 10 T2_N = 20 np.random.seed(0) rng = irm.RNG() data = np.random.rand(T1_N, T2_N) > 0.5 data.shape = T1_N, T2_N m = models.BetaBernoulli() r = Relation([('T1', T1_N), ('T2', T2_N)], data, m) hps = m.create_hps() hps['alpha'] = 1.0 hps['beta'] = 1.0 r.set_hps(hps) tf_1 = model.DomainInterface(T1_N, {'r': ('T1', r)}) tf_1.set_hps({'alpha': 1.0}) tf_2 = model.DomainInterface(T2_N, {'r': ('T2', r)}) tf_2.set_hps({'alpha': 1.0}) T1_GRPN = 4 t1_assign = np.arange(T1_N) % T1_GRPN t1_grps = {} for i, gi in enumerate(t1_assign): if gi not in t1_grps: g = tf_1.create_group(rng) t1_grps[gi] = g tf_1.add_entity_to_group(t1_grps[gi], i) T2_GRPN = 4 t2_assign = np.arange(T2_N) % T2_GRPN t2_grps = {} for i, gi in enumerate(t2_assign): if gi not in t2_grps: g = tf_2.create_group(rng) t2_grps[gi] = g tf_2.add_entity_to_group(t2_grps[gi], i) t1_assign_g = tf_1.get_assignments() t2_assign_g = tf_2.get_assignments() allmodel = model.IRM({'T1': tf_1, 'T2': tf_2}, {'R1': r}) lastscore = allmodel.total_score() for t1_g in np.unique(t1_assign_g): for t2_g in np.unique(t2_assign_g): t1_entities = np.argwhere(t1_assign_g == t1_g).flatten() t2_entities = np.argwhere(t2_assign_g == t2_g).flatten() dps = [] for e1 in t1_entities: for e2 in t2_entities: dps.append(data[e1, e2]) heads = np.sum(np.array(dps) == 1) tails = np.sum(np.array(dps) == 0) # check if the current value is correct c = r.get_component((tf_1.get_relation_groupid(0, t1_g), tf_2.get_relation_groupid(0, t2_g))) assert_equal(heads, c['heads']) assert_equal(tails, c['tails']) # now we set them to a random value c = r.set_component((tf_1.get_relation_groupid( 0, t1_g), tf_2.get_relation_groupid(0, t2_g)), { 'heads': int(heads), 'tails': int(tails) + 1 }) assert allmodel.total_score() != lastscore lastscore = allmodel.total_score() c = r.set_component((tf_1.get_relation_groupid( 0, t1_g), tf_2.get_relation_groupid(0, t2_g)), { 'heads': int(heads) + 1, 'tails': int(tails) + 1 }) assert allmodel.total_score() != lastscore lastscore = allmodel.total_score()
for a in (list(t1_t2_datasets()) + list(t1_t1_datasets())): latent_filename = a[1][0] data_filename = a[1][1] outfilename = latent_filename[:-(len("latent"))] + 'scores' if 'conj' in latent_filename: yield (latent_filename, data_filename), outfilename @follows(t1_t2_datasets) @follows(t1_t1_datasets) @files(score_params) def score((latent_filename, data_filename), outfilename): latent = pickle.load(open(latent_filename, 'r')) data = pickle.load(open(data_filename, 'r')) rng = irm.RNG() irm_model = irmio.create_model_from_data(data, rng=rng) irmio.set_model_latent(irm_model, latent, rng) # now we go through and score every possible latent domain_names = sorted(data['domains'].keys()) domain_sizes = [data['domains'][dn]['N'] for dn in domain_names] # create the dict candidate_partitions = list(putil.enumerate_possible_latents(domain_sizes)) CANDIDATE_N = len(candidate_partitions) scores = {} for cpi, cp in enumerate(candidate_partitions): t1 = time.time() for di, av in enumerate(cp):
def cluster_z_matrix(z, INIT_GROUPS=100, crp_alpha=5.0, beta=0.1, ITERS=4, method='dpmm_bb'): N = len(z) # create the data if method == 'dpmm_bb': model = "BetaBernoulli" assert z.dtype == np.bool hps = {'alpha': beta, 'beta': beta} elif method == "dpmm_gp": model = "GammaPoisson" assert z.dtype == np.uint32 hps = {'alpha': 2.0, 'beta': 2.0} else: raise NotImplementedError("unknown method") data = { 'domains': { 'd1': { 'N': N } }, 'relations': { 'R1': { 'relation': ('d1', 'd1'), 'model': model, 'data': z } } } latent_init = { 'domains': { 'd1': { 'assignment': np.arange(N) % INIT_GROUPS, 'hps': { 'alpha': crp_alpha } } }, 'relations': { 'R1': { 'hps': hps } } } rng = irm.RNG() irm_model = irm.irmio.create_model_from_data(data, rng=rng) irm.irmio.set_model_latent(irm_model, latent_init, rng=rng) run = irm.runner.Runner(latent_init, data, irm.runner.default_kernel_config()) run.run_iters(ITERS) state = run.get_state() return irm.util.canonicalize_assignment( state['domains']['d1']['assignment'])
def run_inference_cxl(infile, outfile): np.random.seed(1) data = pickle.load(open(infile, 'r')) df = data['featuredf'] df_vals = df[np.isfinite(df['contact_x_mean'])] N = len(df_vals) # convert into a real list of lists contact_x_list = np.zeros( N, dtype=irm.models.MixtureModelDistribution().data_dtype()) for xi, x in enumerate(df_vals['contact_x_list']): # in the event of > 1024 we randomly pick 1024 y = np.array(x) x_min = features.BINS[0] x_max = features.BINS[-1] y = (y - x_min) / (x_max - x_min) # normed to [0, 1] contact_x_list[xi]['points'][:len(y)] = y contact_x_list[xi]['len'] = len(y) desc = { 'soma_x': { 'data': to_f32(df_vals['soma_x']), 'model': 'NormalInverseChiSq' }, # 'contact_spatial_std' : {'data' : to_f32(df_vals['contact_spatial_std']), # 'model' : 'NormalInverseChiSq'}, 'contact_x_list': { 'data': contact_x_list, 'model': 'MixtureModelDistribution' } } latent, data = connattribio.create_mm(desc) latent['domains']['d1']['assignment'] = np.arange(N) % 40 COMP_K = 4 latent['relations']['r_contact_x_list']['hps'] = { 'comp_k': COMP_K, 'var_scale': 0.1, 'dir_alpha': 1.0 } latent, data = irm.data.synth.prior_generate(latent, data) rng = irm.RNG() irm_model = irm.irmio.create_model_from_data(data, rng=rng) irm.irmio.set_model_latent(irm_model, latent, rng) kernel_config = irm.runner.default_kernel_anneal(start_temp=64, iterations=250) kernel_config[0][1]['subkernels'][-1][1]['grids'][ 'r_soma_x'] = soma_x_hp_grid() kernel_config[0][1]['subkernels'][-1][1]['grids'][ 'MixtureModelDistribution'] = [{ 'comp_k': 4, 'dir_alpha': 1.0, 'var_scale': 0.1 }] MAX_ITERS = 400 for i in range(MAX_ITERS): irm.runner.do_inference(irm_model, rng, kernel_config, i) new_latent = irm.irmio.get_latent(irm_model) a = new_latent['domains']['d1']['assignment'] print irm.util.assign_to_counts(a) print "i=", i, "MAX_ITERS=", MAX_ITERS pickle.dump({ 'assignment': a, 'latent': new_latent, 'data': data }, open(outfile, 'w'))
def test_slice_nonconj(): T1_N = 10 T2_N = 20 np.random.seed(0) rng = irm.RNG() data = np.random.rand(T1_N, T2_N) > 0.5 data.shape = T1_N, T2_N m = models.BetaBernoulliNonConj() r = irm.Relation([('T1', T1_N), ('T2', T2_N)], data,m) hps = m.create_hps() hps['alpha'] = 1.0 hps['beta'] = 1.0 r.set_hps(hps) tf_1 = model.DomainInterface(T1_N, {'r': ('T1', r)}) tf_1.set_hps({'alpha' : 1.0}) tf_2 = model.DomainInterface(T2_N, {'r' : ('T2', r)}) tf_2.set_hps({'alpha' : 1.0}) T1_GRPN = 4 t1_assign = np.arange(T1_N) % T1_GRPN t1_grps = {} for i, gi in enumerate(t1_assign): if gi not in t1_grps: g = tf_1.create_group(rng) t1_grps[gi] = g tf_1.add_entity_to_group(t1_grps[gi], i) T2_GRPN = 4 t2_assign = np.arange(T2_N) % T2_GRPN t2_grps = {} for i, gi in enumerate(t2_assign): if gi not in t2_grps: g = tf_2.create_group(rng) t2_grps[gi] = g tf_2.add_entity_to_group(t2_grps[gi], i) t1_assign_g = tf_1.get_assignments() t2_assign_g = tf_2.get_assignments() # build list of coords / heads/tails coord_data = {} for t1_g in np.unique(t1_assign_g): for t2_g in np.unique(t2_assign_g): t1_entities = np.argwhere(t1_assign_g == t1_g).flatten() t2_entities = np.argwhere(t2_assign_g == t2_g).flatten() dps = [] for e1 in t1_entities: for e2 in t2_entities: dps.append(data[e1, e2]) heads = np.sum(np.array(dps)==1) tails = np.sum(np.array(dps)==0) # coords = ((tf_1.get_relation_groupid(0, t1_g), # tf_2.get_relation_groupid(0, t2_g))) coord_data[(t1_g, t2_g)] = (heads, tails) # get all the components from this relation # now the histograms for alpha, beta in [(1.0, 1.0), (10.0, 1.0), (1.0, 10.0),(0.1, 5.0)]: coords_hist = {k : [] for k in coord_data} print "alpha=", alpha, "beta=", beta, "="*50 hps['alpha'] = alpha hps['beta'] = beta r.set_hps(hps) ITERS = 100000 for i in range(ITERS): r.apply_comp_kernel("slice_sample", rng, {'width' : 0.4}) component_data = model.get_components_in_relation([(tf_1, 0), (tf_2, 0)], r) for c in coord_data: coords_hist[c].append(component_data[c]['p']) for c in coords_hist: heads, tails = coord_data[c] empirical_p = np.mean(coords_hist[c]) true_map_p = float(heads + alpha) / (heads +tails + alpha + beta) print empirical_p - true_map_p np.testing.assert_approx_equal(empirical_p, true_map_p, 2)