Exemplo n.º 1
0
def test_dependence_probability_pairwise():
    cctypes, distargs = cu.parse_distargs(['normal', 'normal', 'normal'])

    T, Zv, _Zc = tu.gen_data_table(10, [.5, .5], [[.25, .25, .5], [.3, .7]],
                                   cctypes,
                                   distargs, [.95] * len(cctypes),
                                   rng=gu.gen_rng(100))

    outputs = [0, 1, 2]
    engine = Engine(T.T,
                    outputs=outputs,
                    cctypes=cctypes,
                    num_states=4,
                    distargs=distargs,
                    Zv={o: z
                        for o, z in zip(outputs, Zv)},
                    rng=gu.gen_rng(0))

    Ds = engine.dependence_probability_pairwise(multiprocess=0)
    assert len(Ds) == engine.num_states()
    assert all(np.shape(D) == (len(outputs), len(outputs)) for D in Ds)
    for D in Ds:
        for col0, col1 in itertools.product(outputs, outputs):
            i0 = outputs.index(col0)
            i1 = outputs.index(col1)
            actual = D[i0, i1]
            expected = Zv[i0] == Zv[i1]
            assert actual == expected

    Ds = engine.dependence_probability_pairwise(colnos=[0, 2], multiprocess=0)
    assert len(Ds) == engine.num_states()
    assert all(np.shape(D) == (2, 2) for D in Ds)
Exemplo n.º 2
0
def test_two_views_column_partition_normal__ci_(lovecat):
    D = retrieve_normal_dataset()

    engine = Engine(D.T,
                    outputs=[5, 0, 1, 2, 3, 4],
                    cctypes=['normal'] * len(D),
                    rng=gu.gen_rng(12),
                    num_states=64)

    if lovecat:
        engine.transition_lovecat(N=200)
    else:
        engine.transition(N=200)

    P = engine.dependence_probability_pairwise()
    R1 = engine.row_similarity_pairwise(cols=[5, 0, 1])
    R2 = engine.row_similarity_pairwise(cols=[2, 3, 4])

    pu.plot_clustermap(P)
    pu.plot_clustermap(R1)
    pu.plot_clustermap(R2)

    P_THEORY = [
        [1, 1, 1, 0, 0, 0],
        [1, 1, 1, 0, 0, 0],
        [1, 1, 1, 0, 0, 0],
        [0, 0, 0, 1, 1, 1],
        [0, 0, 0, 1, 1, 1],
        [0, 0, 0, 1, 1, 1],
    ]
    return engine
Exemplo n.º 3
0
def test_dependence_probability():
    '''Test that Loom correctly recovers a 2-view dataset.'''
    D, Zv, Zc = tu.gen_data_table(n_rows=150,
                                  view_weights=None,
                                  cluster_weights=[
                                      [.2, .2, .2, .4],
                                      [.3, .2, .5],
                                  ],
                                  cctypes=['normal'] * 6,
                                  distargs=[None] * 6,
                                  separation=[0.95] * 6,
                                  view_partition=[0, 0, 0, 1, 1, 1],
                                  rng=gu.gen_rng(12))

    engine = Engine(
        D.T,
        outputs=[7, 2, 12, 80, 129, 98],
        cctypes=['normal'] * len(D),
        distargs=[None] * 6,
        rng=gu.gen_rng(122),
        num_states=20,
    )

    logscore0 = engine.logpdf_score()
    engine.transition_loom(N=100)
    logscore1 = engine.logpdf_score()
    assert numpy.mean(logscore1) > numpy.mean(logscore0)

    dependence_probability = numpy.mean(
        engine.dependence_probability_pairwise(), axis=0)

    assert dependence_probability[0, 1] > 0.8
    assert dependence_probability[1, 2] > 0.8
    assert dependence_probability[0, 2] > 0.8

    assert dependence_probability[3, 4] > 0.8
    assert dependence_probability[4, 5] > 0.8
    assert dependence_probability[3, 5] > 0.8

    assert dependence_probability[0, 3] < 0.2
    assert dependence_probability[0, 4] < 0.2
    assert dependence_probability[0, 5] < 0.2

    assert dependence_probability[1, 3] < 0.2
    assert dependence_probability[1, 4] < 0.2
    assert dependence_probability[1, 5] < 0.2

    assert dependence_probability[2, 3] < 0.2
    assert dependence_probability[2, 4] < 0.2
    assert dependence_probability[2, 5] < 0.2
Exemplo n.º 4
0
def launch_analysis():
    engine = Engine(animals.values.astype(float),
                    num_states=64,
                    cctypes=['categorical'] * len(animals.values[0]),
                    distargs=[{
                        'k': 2
                    }] * len(animals.values[0]),
                    rng=gu.gen_rng(7))

    engine.transition(N=900)
    with open('resources/animals/animals.engine', 'w') as f:
        engine.to_pickle(f)

    engine = Engine.from_pickle(open('resources/animals/animals.engine', 'r'))
    D = engine.dependence_probability_pairwise()
    pu.plot_clustermap(D)
Exemplo n.º 5
0
def test_two_views_column_partition_bernoulli__ci_(lovecat):
    D = retrieve_bernoulli_dataset()

    engine = Engine(D.T,
                    cctypes=['categorical'] * len(D),
                    distargs=[{
                        'k': 2
                    }] * len(D),
                    rng=gu.gen_rng(12),
                    num_states=64)
    if lovecat:
        engine.transition_lovecat(N=200)
    else:
        # engine = Engine(
        #     D.T,
        #     cctypes=['bernoulli']*len(D),
        #     rng=gu.gen_rng(12),
        #     num_states=64)
        engine.transition(N=200)

    P = engine.dependence_probability_pairwise()
    R1 = engine.row_similarity_pairwise(cols=[0, 1])
    R2 = engine.row_similarity_pairwise(cols=[2, 3])

    pu.plot_clustermap(P)
    pu.plot_clustermap(R1)
    pu.plot_clustermap(R2)

    P_THEORY = [
        [1, 1, 1, 0, 0, 0],
        [1, 1, 1, 0, 0, 0],
        [1, 1, 1, 0, 0, 0],
        [0, 0, 0, 1, 1, 1],
        [0, 0, 0, 1, 1, 1],
        [0, 0, 0, 1, 1, 1],
    ]
    return engine
Exemplo n.º 6
0
N_ROWS = 300
N_STATES = 12
N_ITERS = 100

cctypes = ['categorical(k={})'.format(N_ROWS)] + ['normal']*8
cctypes, distargs = cu.parse_distargs(cctypes)
column_names = ['id'] + ['one cluster']*4 + ['four cluster']*4

# id column.
X = np.zeros((N_ROWS, 9))
X[:,0] = np.arange(N_ROWS)

# Four columns of one cluster from the standard normal.
X[:,1:5] = np.random.randn(N_ROWS, 4)

# Four columns of four clusters with unit variance and means \in {0,1,2,3}.
Z = np.random.randint(4, size=(N_ROWS))
X[:,5:] = 4*np.reshape(np.repeat(Z,4), (len(Z),4)) + np.random.randn(N_ROWS, 4)

# Inference.
engine = Engine(
    X, cctypes=cctypes, distargs=distargs, num_states=N_STATES)
engine.transition(N=N_ITERS)

# Dependence probability.
D = engine.dependence_probability_pairwise()
zmat = sns.clustermap(D, yticklabels=column_names, xticklabels=column_names)
plt.setp(zmat.ax_heatmap.get_yticklabels(), rotation=0)
plt.setp(zmat.ax_heatmap.get_xticklabels(), rotation=90)
plt.show()