def test_dependence_probability_pairwise(): cctypes, distargs = cu.parse_distargs(['normal', 'normal', 'normal']) T, Zv, _Zc = tu.gen_data_table(10, [.5, .5], [[.25, .25, .5], [.3, .7]], cctypes, distargs, [.95] * len(cctypes), rng=gu.gen_rng(100)) outputs = [0, 1, 2] engine = Engine(T.T, outputs=outputs, cctypes=cctypes, num_states=4, distargs=distargs, Zv={o: z for o, z in zip(outputs, Zv)}, rng=gu.gen_rng(0)) Ds = engine.dependence_probability_pairwise(multiprocess=0) assert len(Ds) == engine.num_states() assert all(np.shape(D) == (len(outputs), len(outputs)) for D in Ds) for D in Ds: for col0, col1 in itertools.product(outputs, outputs): i0 = outputs.index(col0) i1 = outputs.index(col1) actual = D[i0, i1] expected = Zv[i0] == Zv[i1] assert actual == expected Ds = engine.dependence_probability_pairwise(colnos=[0, 2], multiprocess=0) assert len(Ds) == engine.num_states() assert all(np.shape(D) == (2, 2) for D in Ds)
def test_two_views_column_partition_normal__ci_(lovecat): D = retrieve_normal_dataset() engine = Engine(D.T, outputs=[5, 0, 1, 2, 3, 4], cctypes=['normal'] * len(D), rng=gu.gen_rng(12), num_states=64) if lovecat: engine.transition_lovecat(N=200) else: engine.transition(N=200) P = engine.dependence_probability_pairwise() R1 = engine.row_similarity_pairwise(cols=[5, 0, 1]) R2 = engine.row_similarity_pairwise(cols=[2, 3, 4]) pu.plot_clustermap(P) pu.plot_clustermap(R1) pu.plot_clustermap(R2) P_THEORY = [ [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], ] return engine
def test_dependence_probability(): '''Test that Loom correctly recovers a 2-view dataset.''' D, Zv, Zc = tu.gen_data_table(n_rows=150, view_weights=None, cluster_weights=[ [.2, .2, .2, .4], [.3, .2, .5], ], cctypes=['normal'] * 6, distargs=[None] * 6, separation=[0.95] * 6, view_partition=[0, 0, 0, 1, 1, 1], rng=gu.gen_rng(12)) engine = Engine( D.T, outputs=[7, 2, 12, 80, 129, 98], cctypes=['normal'] * len(D), distargs=[None] * 6, rng=gu.gen_rng(122), num_states=20, ) logscore0 = engine.logpdf_score() engine.transition_loom(N=100) logscore1 = engine.logpdf_score() assert numpy.mean(logscore1) > numpy.mean(logscore0) dependence_probability = numpy.mean( engine.dependence_probability_pairwise(), axis=0) assert dependence_probability[0, 1] > 0.8 assert dependence_probability[1, 2] > 0.8 assert dependence_probability[0, 2] > 0.8 assert dependence_probability[3, 4] > 0.8 assert dependence_probability[4, 5] > 0.8 assert dependence_probability[3, 5] > 0.8 assert dependence_probability[0, 3] < 0.2 assert dependence_probability[0, 4] < 0.2 assert dependence_probability[0, 5] < 0.2 assert dependence_probability[1, 3] < 0.2 assert dependence_probability[1, 4] < 0.2 assert dependence_probability[1, 5] < 0.2 assert dependence_probability[2, 3] < 0.2 assert dependence_probability[2, 4] < 0.2 assert dependence_probability[2, 5] < 0.2
def launch_analysis(): engine = Engine(animals.values.astype(float), num_states=64, cctypes=['categorical'] * len(animals.values[0]), distargs=[{ 'k': 2 }] * len(animals.values[0]), rng=gu.gen_rng(7)) engine.transition(N=900) with open('resources/animals/animals.engine', 'w') as f: engine.to_pickle(f) engine = Engine.from_pickle(open('resources/animals/animals.engine', 'r')) D = engine.dependence_probability_pairwise() pu.plot_clustermap(D)
def test_two_views_column_partition_bernoulli__ci_(lovecat): D = retrieve_bernoulli_dataset() engine = Engine(D.T, cctypes=['categorical'] * len(D), distargs=[{ 'k': 2 }] * len(D), rng=gu.gen_rng(12), num_states=64) if lovecat: engine.transition_lovecat(N=200) else: # engine = Engine( # D.T, # cctypes=['bernoulli']*len(D), # rng=gu.gen_rng(12), # num_states=64) engine.transition(N=200) P = engine.dependence_probability_pairwise() R1 = engine.row_similarity_pairwise(cols=[0, 1]) R2 = engine.row_similarity_pairwise(cols=[2, 3]) pu.plot_clustermap(P) pu.plot_clustermap(R1) pu.plot_clustermap(R2) P_THEORY = [ [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0], [1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], ] return engine
N_ROWS = 300 N_STATES = 12 N_ITERS = 100 cctypes = ['categorical(k={})'.format(N_ROWS)] + ['normal']*8 cctypes, distargs = cu.parse_distargs(cctypes) column_names = ['id'] + ['one cluster']*4 + ['four cluster']*4 # id column. X = np.zeros((N_ROWS, 9)) X[:,0] = np.arange(N_ROWS) # Four columns of one cluster from the standard normal. X[:,1:5] = np.random.randn(N_ROWS, 4) # Four columns of four clusters with unit variance and means \in {0,1,2,3}. Z = np.random.randint(4, size=(N_ROWS)) X[:,5:] = 4*np.reshape(np.repeat(Z,4), (len(Z),4)) + np.random.randn(N_ROWS, 4) # Inference. engine = Engine( X, cctypes=cctypes, distargs=distargs, num_states=N_STATES) engine.transition(N=N_ITERS) # Dependence probability. D = engine.dependence_probability_pairwise() zmat = sns.clustermap(D, yticklabels=column_names, xticklabels=column_names) plt.setp(zmat.ax_heatmap.get_yticklabels(), rotation=0) plt.setp(zmat.ax_heatmap.get_xticklabels(), rotation=90) plt.show()