Пример #1
0
 def _incorporate_new_timepoints(self, frame):
     """Incorporate fresh sample ids as new cgpm rows."""
     new_timepoints = frame.index[~frame.index.isin(self.dataset.index)]
     new_observations = frame[self.variables].loc[new_timepoints]
     self.dataset = self.dataset.append(new_observations)
     new_rows = [self._get_timepoint_row(t) for t in new_timepoints]
     if self.initialized:
         outputs = self.engine.states[0].outputs
         assert all(len(row) == len(outputs) for row in new_rows)
         rowids_cgpm = range(self.engine.states[0].n_rows(),
                             self.engine.states[0].n_rows() + len(new_rows))
         observations_cgpm = [{
             i: row[i]
             for i in outputs if not np.isnan(row[i])
         } for row in new_rows]
         assert all(
             rowid_cgpm == self._timepoint_to_rowid(timepoint)
             for timepoint, rowid_cgpm in zip(new_timepoints, rowids_cgpm))
         self.engine.incorporate_bulk(rowids_cgpm, observations_cgpm)
     # XXX Do not initialize here! Instead, consider including a dummy row of
     # all zeros or similar. The reason that we initialize with the full
     # training set is to ensure that we have a good initial set of
     # hyperparameter grids. Instead, we should consider redefining the grids
     # after incorporating new data (a slight heuristic).
     else:
         self.engine = Engine(
             np.asarray(new_rows),
             num_states=self.chains,
             cctypes=['normal'] * len(self.variables_lagged),
             Cd=self._get_variable_dependence_constraints(),
             rng=self.rng,
         )
         self.initialized = True
Пример #2
0
 def _incorporate_new_timepoints(self, frame):
     """Incorporate fresh sample ids as new cgpm rows."""
     new_timepoints = frame.index[~frame.index.isin(self.dataset.index)]
     new_observations = frame[self.variables].loc[new_timepoints]
     self.dataset = self.dataset.append(new_observations)
     new_rows = [self._get_timepoint_row(t) for t in new_timepoints]
     if self.initialized:
         outputs = self.engine.states[0].outputs
         for row, timepoint in zip(new_rows, new_timepoints):
             rowid_cgpm = self.engine.states[0].n_rows()
             assert len(row) == len(outputs)
             assert rowid_cgpm == self._timepoint_to_rowid(timepoint)
             row_cgpm = {i: row[i] for i in outputs if not np.isnan(row[i])}
             self.engine.incorporate(rowid_cgpm, row_cgpm)
     # XXX Do not initialize here! Instead, consider including a dummy row of
     # all zeros or something. The reason that we initialize with the full
     # training set is to ensure that we have a good initial set of
     # hyperparameter grids.
     else:
         self.engine = Engine(
             np.asarray(new_rows),
             num_states=self.chains,
             cctypes=['normal'] * len(self.variables_lagged),
             Cd=self._get_variable_dependence_constraints(),
             rng=self.rng,
         )
         self.initialized = True
Пример #3
0
def test_engine_simulate_no_repeat():
    """Generate 3 samples from 2 states 10 times, and ensure uniqueness."""
    rng = gu.gen_rng(1)
    engine = Engine(X=[[1]], cctypes=['normal'], num_states=2, rng=rng)
    samples_list = [[
        sample[0] for sample in engine.simulate(None, [0], N=3)[0]
    ] for _i in xrange(10)]
    samples_set = set([frozenset(s) for s in samples_list])
    assert len(samples_set) == len(samples_list)
Пример #4
0
def compare_dependence_heatmap():
    e1 = Engine.from_pickle('resources/animals/animals.engine')
    e2 = Engine.from_pickle('resources/animals/animals-lovecat.engine')

    D1 = e1.dependence_probability_pairwise()
    D2 = e2.dependence_probability_pairwise()
    C1 = pu.plot_clustermap(D1)

    ordering = C1.dendrogram_row.reordered_ind

    fig, ax = plt.subplots(nrows=1, ncols=2)
    pu.plot_heatmap(D1, xordering=ordering, yordering=ordering, ax=ax[0])
    pu.plot_heatmap(D2, xordering=ordering, yordering=ordering, ax=ax[1])
Пример #5
0
def test_two_views_column_partition_normal__ci_(lovecat):
    D = retrieve_normal_dataset()

    engine = Engine(D.T,
                    outputs=[5, 0, 1, 2, 3, 4],
                    cctypes=['normal'] * len(D),
                    rng=gu.gen_rng(12),
                    num_states=64)

    if lovecat:
        engine.transition_lovecat(N=200)
    else:
        engine.transition(N=200)

    P = engine.dependence_probability_pairwise()
    R1 = engine.row_similarity_pairwise(cols=[5, 0, 1])
    R2 = engine.row_similarity_pairwise(cols=[2, 3, 4])

    pu.plot_clustermap(P)
    pu.plot_clustermap(R1)
    pu.plot_clustermap(R2)

    P_THEORY = [
        [1, 1, 1, 0, 0, 0],
        [1, 1, 1, 0, 0, 0],
        [1, 1, 1, 0, 0, 0],
        [0, 0, 0, 1, 1, 1],
        [0, 0, 0, 1, 1, 1],
        [0, 0, 0, 1, 1, 1],
    ]
    return engine
Пример #6
0
def test_logpdf_score_crash():
    rng = gen_rng(8)
    # T = rng.choice([0,1], p=[.3,.7], size=250).reshape(-1,1)
    T = rng.normal(size=30).reshape(-1, 1)
    engine = Engine(T, cctypes=['normal'], rng=rng, num_states=4)
    logpdf_likelihood_initial = np.array(engine.logpdf_likelihood())
    logpdf_score_initial = np.array(engine.logpdf_score())
    assert np.all(logpdf_score_initial < logpdf_likelihood_initial)
    # assert np.all(logpdf_likelihood_initial < logpdf_score_initial)
    engine.transition(N=100)
    engine.transition(kernels=['column_hypers', 'view_alphas'], N=10)
    logpdf_likelihood_final = np.asarray(engine.logpdf_likelihood())
    logpdf_score_final = np.asarray(engine.logpdf_score())
    assert np.all(logpdf_score_final < logpdf_likelihood_final)
    assert np.max(logpdf_score_initial) < np.max(logpdf_score_final)
Пример #7
0
    def _engine(self, bdb, generator_id):
        # Probe the cache.
        cache = self._cache(bdb)
        if cache is not None and generator_id in cache.engine:
            return cache.engine[generator_id]

        # Not cached.  Load the engine from the database.
        cursor = bdb.sql_execute(
            '''
            SELECT engine_json FROM bayesdb_cgpm_generator
                WHERE generator_id = ?
        ''', (generator_id, ))
        engine_json = cursor_value(cursor)
        if engine_json is None:
            generator = core.bayesdb_generator_name(bdb, generator_id)
            raise BQLError(
                bdb, 'No models initialized for generator: %r' % (generator, ))

        # Deserialize the engine.
        engine = Engine.from_metadata(json.loads(engine_json),
                                      rng=bdb.np_prng,
                                      multiprocess=self._ncpu)

        # Cache it, if we can.
        if cache is not None:
            cache.engine[generator_id] = engine
        return engine
Пример #8
0
def test_logpdf_bulk__ci_(engine):
    engine = Engine.from_metadata(engine)
    rowid1, targets1, constraints1 = 5, {0: 0}, {2: 1, 3: .5}
    rowid2, targets2, constraints2 = -1, {1: 0, 4: .8}, {5: .5}
    # Bulk.
    rowids = [rowid1, rowid2]
    targets_list = [targets1, targets2]
    constraints_list = [constraints1, constraints2]

    def test_correct_dimensions(statenos):
        # Invoke
        logpdfs = engine.logpdf_bulk(rowids,
                                     targets_list,
                                     constraints_list=constraints_list,
                                     statenos=statenos)
        assert len(logpdfs) == \
            engine.num_states() if statenos is None else len(statenos)
        for state_logpdfs in logpdfs:
            # state_logpdfs should be a list of floats, one float per targets.
            assert len(state_logpdfs) == len(rowids)
            for l in state_logpdfs:
                assert isinstance(l, float)

    test_correct_dimensions(statenos=None)
    test_correct_dimensions(statenos=[0, 1, 4, 5])
Пример #9
0
def engine():
    # Set up the data generation
    cctypes, distargs = cu.parse_distargs([
        'normal',
        'poisson',
        'bernoulli',
        'categorical(k=4)',
        'lognormal',
        'exponential',
        'beta',
        'geometric',
        'vonmises',
    ])

    T, Zv, Zc = tu.gen_data_table(20, [1], [[.25, .25, .5]],
                                  cctypes,
                                  distargs, [.95] * len(cctypes),
                                  rng=gu.gen_rng(10))

    return Engine(T.T,
                  cctypes=cctypes,
                  distargs=distargs,
                  num_states=4,
                  rng=gu.gen_rng(312),
                  multiprocess=False)
Пример #10
0
def test_simulate_bulk__ci_(engine):
    engine = Engine.from_metadata(engine)
    rowid1, targets1, constraints1, N1, = -1, [0, 2, 4, 5], {3: 1}, 7
    rowid2, targets2, constraints2, N2 = 5, [1, 3], {2: 1}, 3
    rowid3, targets3, constraints3, N3 = 8, [0], {4: .8}, 3
    # Bulk.
    rowids = [rowid1, rowid2, rowid3]
    targets_list = [targets1, targets2, targets3]
    constraints_list = [constraints1, constraints2, constraints3]
    Ns = [N1, N2, N3]

    def test_correct_dimensions(statenos):
        # Invoke
        samples = engine.simulate_bulk(rowids,
                                       targets_list,
                                       constraints_list=constraints_list,
                                       Ns=Ns,
                                       statenos=statenos)
        assert len(samples) == (engine.num_states()
                                if statenos is None else len(statenos))
        for states_samples in samples:
            assert len(states_samples) == len(rowids)
            for i, sample in enumerate(states_samples):
                assert len(sample) == Ns[i]
                for s in sample:
                    assert set(s.keys()) == set(targets_list[i])
                    assert len(s) == len(targets_list[i])

    test_correct_dimensions(None)
    test_correct_dimensions([4])
Пример #11
0
def test_dependence_probability__ci_(engine):
    engine = Engine.from_metadata(engine)

    results = engine.dependence_probability(0, 2, statenos=None)
    assert len(results) == engine.num_states()

    results = engine.dependence_probability(0, 2, statenos=[1, 4])
    assert len(results) == 2
Пример #12
0
 def _populate_from_metadata(model, metadata):
     model.initialized = metadata['initialized']
     model.dataset = pd.DataFrame(metadata['dataset.values'],
                                  index=metadata['dataset.index'],
                                  columns=metadata['dataset.columns'])
     model.engine = Engine.from_metadata(metadata['engine']) \
         if model.initialized else None
     return model
Пример #13
0
def test_row_similarity__ci_(engine):
    engine = Engine.from_metadata(engine)

    results = engine.row_similarity(0, 2, statenos=None)
    assert len(results) == engine.num_states()

    results = engine.row_similarity(0, 2, statenos=[1, 4, 5])
    assert len(results) == 3
Пример #14
0
def run_test(args):
    n_rows = args["num_rows"]
    n_iters = args["num_iters"]
    n_chains = args["num_chains"]

    n_per_chain = int(float(n_rows) / n_chains)

    fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(16, 9))
    axes = axes.ravel()
    k = 0
    for shape in shapes:
        print "Shape: %s" % shape
        T_o = np.asarray(gen_function[shape](n_rows))
        T_i = []

        engine = Engine(T_o.T,
                        cctypes=cctypes,
                        distargs=distargs,
                        num_states=n_chains)
        engine.transition(N=n_iters)

        for chain in xrange(n_chains):
            state = engine.get_state(chain)
            print "chain %i of %i" % (chain + 1, n_chains)
            T_i.extend(state.simulate(-1, [0, 1], N=n_per_chain))

        T_i = np.array(T_i)

        ax = axes[k]
        ax.scatter(T_o[0], T_o[1], color='blue', edgecolor='none')
        ax.set_xlabel("X")
        ax.set_ylabel("Y")
        ax.set_title("%s original" % shape)

        ax = axes[k + 4]
        ax.scatter(T_i[:, 0], T_i[:, 1], color='red', edgecolor='none')
        ax.set_xlabel("X")
        ax.set_ylabel("Y")
        ax.set_xlim(ax.get_xlim())
        ax.set_ylim(ax.get_ylim())
        ax.set_title("%s simulated" % shape)

        k += 1

    print "Done."
    return fig
Пример #15
0
def get_engine():
    cctypes, distargs = cu.parse_distargs(
        ['normal', 'poisson', 'bernoulli', 'lognormal', 'beta', 'vonmises'])
    T, Zv, Zc = tu.gen_data_table(20, [1], [[.25, .25, .5]],
                                  cctypes,
                                  distargs, [.95] * len(cctypes),
                                  rng=gu.gen_rng(0))
    T = T.T
    # Make some nan cells for evidence.
    T[5, 0] = T[5, 1] = T[5, 2] = T[5, 3] = np.nan
    T[8, 4] = np.nan
    engine = Engine(T,
                    cctypes=cctypes,
                    distargs=distargs,
                    num_states=6,
                    rng=gu.gen_rng(0))
    engine.transition(N=2)
    return engine
Пример #16
0
def render_states_to_disk(filepath, prefix):
    engine = Engine.from_pickle(filepath)
    for i in range(engine.num_states()):
        print '\r%d' % (i, )
        savefile = '%s-%d' % (prefix, i)
        state = engine.get_state(i)
        ru.viz_state(state,
                     row_names=animal_names,
                     col_names=animal_features,
                     savefile=savefile)
Пример #17
0
def generate_gpmcc_posteriors(cctype, distargs, D_train, iters, seconds):
    """Learns gpmcc on D_train for seconds and simulates NUM_TEST times."""
    # Learning and posterior simulation.
    engine = Engine(D_train,
                    cctypes=[cctype],
                    distargs=[distargs],
                    num_states=64,
                    rng=gu.gen_rng(1))
    engine.transition(N=iters, S=seconds, progress=0)
    if iters:
        kernel = 'column_params' if cu.cctype_class(cctype).is_conditional()\
            else 'column_hypers'
        engine.transition(N=100, kernels=[kernel], progress=0)
    samples = engine.simulate(-1, [0], N=NUM_TEST)
    marginals = engine.logpdf_score()
    ranking = np.argsort(marginals)[::-1]
    for r in ranking[:5]:
        engine.get_state(r).plot()
    return [samples[i] for i in ranking[:5]]
Пример #18
0
def test_engine_composition():
    from cgpm.crosscat.engine import Engine

    X = np.asarray([
        [1, 2, 0, 1],
        [1, 1, 0, 0],
    ])
    engine = Engine(X[:, [3]], outputs=[3], cctypes=['normal'], num_states=2)
    cgpm = VsCGpm(
        outputs=[0, 1],
        inputs=[3],
        source=source_abstract,
    )

    for i, row in enumerate(X):
        cgpm.incorporate(i, {0: row[0], 1: row[1]}, {3: row[3]})

    cgpm.transition(N=2)
    engine.compose_cgpm([cgpm, cgpm], multiprocess=True)
Пример #19
0
def test_relevance_probability__ci_(engine):
    engine = Engine.from_metadata(engine)

    results = engine.relevance_probability(0, [2, 14], 0, statenos=None)
    assert len(results) == engine.num_states()

    results = engine.relevance_probability(0, [2, 14],
                                           0,
                                           statenos=range(engine.num_states()))
    assert len(results) == engine.num_states()
Пример #20
0
def test_two_views_row_partition_bernoulli__ci_(lovecat):
    D = retrieve_bernoulli_dataset()

    if lovecat:
        engine = Engine(D.T,
                        cctypes=['categorical'] * len(D),
                        distargs=[{
                            'k': 2
                        }] * len(D),
                        Zv={
                            0: 0,
                            1: 0,
                            2: 1,
                            3: 1
                        },
                        rng=gu.gen_rng(12),
                        num_states=64)
        engine.transition_lovecat(N=100,
                                  kernels=[
                                      'row_partition_assignments',
                                      'row_partition_hyperparameters',
                                      'column_hyperparameters',
                                  ])
    else:
        engine = Engine(D.T,
                        cctypes=['bernoulli'] * len(D),
                        Zv={
                            0: 0,
                            1: 0,
                            2: 1,
                            3: 1
                        },
                        rng=gu.gen_rng(12),
                        num_states=64)
        engine.transition(N=100,
                          kernels=[
                              'view_alphas',
                              'rows',
                              'column_hypers',
                          ])

    R1 = engine.row_similarity_pairwise(cols=[0, 1])
    R2 = engine.row_similarity_pairwise(cols=[2, 3])

    pu.plot_clustermap(R1)
    pu.plot_clustermap(R2)
    return engine
Пример #21
0
def get_engine():
    X = [[0.123, 1, 0], [1.12, 0, 1], [1.1, 1, 2]]
    rng = gu.gen_rng(1)
    return Engine(X,
                  outputs=[8, 7, 9],
                  num_states=4,
                  cctypes=['normal', 'bernoulli', 'categorical'],
                  distargs=[None, None, {
                      'k': 3
                  }],
                  rng=rng)
Пример #22
0
def test_dependence_probability_pairwise():
    cctypes, distargs = cu.parse_distargs(['normal', 'normal', 'normal'])

    T, Zv, _Zc = tu.gen_data_table(10, [.5, .5], [[.25, .25, .5], [.3, .7]],
                                   cctypes,
                                   distargs, [.95] * len(cctypes),
                                   rng=gu.gen_rng(100))

    outputs = [0, 1, 2]
    engine = Engine(T.T,
                    outputs=outputs,
                    cctypes=cctypes,
                    num_states=4,
                    distargs=distargs,
                    Zv={o: z
                        for o, z in zip(outputs, Zv)},
                    rng=gu.gen_rng(0))

    Ds = engine.dependence_probability_pairwise(multiprocess=0)
    assert len(Ds) == engine.num_states()
    assert all(np.shape(D) == (len(outputs), len(outputs)) for D in Ds)
    for D in Ds:
        for col0, col1 in itertools.product(outputs, outputs):
            i0 = outputs.index(col0)
            i1 = outputs.index(col1)
            actual = D[i0, i1]
            expected = Zv[i0] == Zv[i1]
            assert actual == expected

    Ds = engine.dependence_probability_pairwise(colnos=[0, 2], multiprocess=0)
    assert len(Ds) == engine.num_states()
    assert all(np.shape(D) == (2, 2) for D in Ds)
Пример #23
0
def launch_analysis():
    engine = Engine(animals.values.astype(float),
                    num_states=64,
                    cctypes=['categorical'] * len(animals.values[0]),
                    distargs=[{
                        'k': 2
                    }] * len(animals.values[0]),
                    rng=gu.gen_rng(7))

    engine.transition(N=900)
    with open('resources/animals/animals.engine', 'w') as f:
        engine.to_pickle(f)

    engine = Engine.from_pickle(open('resources/animals/animals.engine', 'r'))
    D = engine.dependence_probability_pairwise()
    pu.plot_clustermap(D)
Пример #24
0
def test_incorporate_engine():
    engine = Engine(
        T[:,:2],
        cctypes=CCTYPES[:2],
        distargs=DISTARGS[:2],
        num_states=4,
        rng=gu.gen_rng(0),
    )
    engine.transition(N=5)

    # Incorporate a new dim into with a non-contiguous output.
    engine.incorporate_dim(
        T[:,2],
        outputs=[10],
        cctype=CCTYPES[2],
        distargs=DISTARGS[2]
    )
    engine.transition(N=2)

    # Serialize the engine, and run a targeted transtion on variable 10.
    m = engine.to_metadata()
    engine2 = Engine.from_metadata(m)
    engine2.transition(N=2, cols=[10], multiprocess=0)
    assert all(s.outputs == [0,1,10] for s in engine.states)
Пример #25
0
 def from_metadata(metadata, seed):
     model = TRCRP_Mixture(
         chains=metadata['chains'],
         lag=metadata['lag'],
         variables=metadata['variables'],
         rng=np.random.RandomState(seed),
     )
     # Internal fields.
     model.initialized = metadata['initialized']
     model.dataset = pd.DataFrame(metadata['dataset.values'],
                                  index=metadata['dataset.index'],
                                  columns=metadata['dataset.columns'])
     model.engine = Engine.from_metadata(metadata['engine']) \
         if model.initialized else None
     return model
Пример #26
0
def test_dependence_probability():
    '''Test that Loom correctly recovers a 2-view dataset.'''
    D, Zv, Zc = tu.gen_data_table(n_rows=150,
                                  view_weights=None,
                                  cluster_weights=[
                                      [.2, .2, .2, .4],
                                      [.3, .2, .5],
                                  ],
                                  cctypes=['normal'] * 6,
                                  distargs=[None] * 6,
                                  separation=[0.95] * 6,
                                  view_partition=[0, 0, 0, 1, 1, 1],
                                  rng=gu.gen_rng(12))

    engine = Engine(
        D.T,
        outputs=[7, 2, 12, 80, 129, 98],
        cctypes=['normal'] * len(D),
        distargs=[None] * 6,
        rng=gu.gen_rng(122),
        num_states=20,
    )

    logscore0 = engine.logpdf_score()
    engine.transition_loom(N=100)
    logscore1 = engine.logpdf_score()
    assert numpy.mean(logscore1) > numpy.mean(logscore0)

    dependence_probability = numpy.mean(
        engine.dependence_probability_pairwise(), axis=0)

    assert dependence_probability[0, 1] > 0.8
    assert dependence_probability[1, 2] > 0.8
    assert dependence_probability[0, 2] > 0.8

    assert dependence_probability[3, 4] > 0.8
    assert dependence_probability[4, 5] > 0.8
    assert dependence_probability[3, 5] > 0.8

    assert dependence_probability[0, 3] < 0.2
    assert dependence_probability[0, 4] < 0.2
    assert dependence_probability[0, 5] < 0.2

    assert dependence_probability[1, 3] < 0.2
    assert dependence_probability[1, 4] < 0.2
    assert dependence_probability[1, 5] < 0.2

    assert dependence_probability[2, 3] < 0.2
    assert dependence_probability[2, 4] < 0.2
    assert dependence_probability[2, 5] < 0.2
Пример #27
0
def test_entropy_bernoulli_bivariate__ci_():
    rng = gen_rng(10)

    # Generate a bivariate Bernoulli dataset.
    PX = [.3, .7]
    PY = [[.2, .8], [.6, .4]]
    TX = rng.choice([0, 1], p=PX, size=250)
    TY = np.zeros(shape=len(TX))
    TY[TX == 0] = rng.choice([0, 1], p=PY[0], size=len(TX[TX == 0]))
    TY[TX == 1] = rng.choice([0, 1], p=PY[0], size=len(TX[TX == 1]))
    T = np.column_stack((TY, TX))

    engine = Engine(
        T,
        cctypes=['categorical', 'categorical'],
        distargs=[{
            'k': 2
        }, {
            'k': 2
        }],
        num_states=64,
        rng=rng,
    )

    engine.transition_lovecat(N=200)

    # exact computation
    entropy_exact = (-PX[0] * PY[0][0] * np.log(PX[0] * PY[0][0]) -
                     PX[0] * PY[0][1] * np.log(PX[0] * PY[0][1]) -
                     PX[1] * PY[1][0] * np.log(PX[1] * PY[1][0]) -
                     PX[1] * PY[1][1] * np.log(PX[1] * PY[1][1]))

    # logpdf computation
    logps = engine.logpdf_bulk([-1, -1, -1, -1], [{
        0: 0,
        1: 0
    }, {
        0: 0,
        1: 1
    }, {
        0: 1,
        1: 0
    }, {
        0: 1,
        1: 1
    }])
    entropy_logpdf = [-np.sum(np.exp(logp) * logp) for logp in logps]

    # mutual_information computation.
    entropy_mi = engine.mutual_information([0, 1], [0, 1], N=1000)

    # Punt CLT analysis and go for a small tolerance.
    assert np.allclose(entropy_exact, entropy_logpdf, atol=.15)
    assert np.allclose(entropy_exact, entropy_mi, atol=.15)
    assert np.allclose(entropy_logpdf, entropy_mi, atol=.1)
Пример #28
0
def test_multiple_stattypes():
    '''Test cgpm statistical types are heuristically converted to Loom types.'''
    cctypes, distargs = cu.parse_distargs([
        'normal', 'poisson', 'bernoulli', 'categorical(k=4)', 'lognormal',
        'exponential', 'beta', 'geometric', 'vonmises'
    ])

    T, Zv, Zc = tu.gen_data_table(200, [1], [[.25, .25, .5]],
                                  cctypes,
                                  distargs, [.95] * len(cctypes),
                                  rng=gu.gen_rng(10))

    engine = Engine(
        T.T,
        cctypes=cctypes,
        distargs=distargs,
        rng=gu.gen_rng(15),
        num_states=16,
    )

    logscore0 = engine.logpdf_score()
    engine.transition_loom(N=5)
    logscore1 = engine.logpdf_score()
    assert numpy.mean(logscore1) > numpy.mean(logscore0)

    # Check serializeation.
    metadata = engine.to_metadata()
    modname = importlib.import_module(metadata['factory'][0])
    builder = getattr(modname, metadata['factory'][1])
    engine2 = builder.from_metadata(metadata)

    # To JSON.
    json_metadata = json.dumps(engine.to_metadata())
    engine3 = builder.from_metadata(json.loads(json_metadata))

    # Assert all states in engine, engine2, and engine3 have same loom_path.
    loom_paths = list(
        itertools.chain.from_iterable([s._loom_path for s in e.states]
                                      for e in [engine, engine2, engine3]))
    assert all(p == loom_paths[0] for p in loom_paths)

    engine2.transition(S=5)
    dependence_probability = engine2.dependence_probability_pairwise()

    assert numpy.all(dependence_probability > 0.85)
Пример #29
0
def test_errors():
    """Targets loomcat._validate_transition."""
    D, Zv, Zc = tu.gen_data_table(n_rows=150,
                                  view_weights=None,
                                  cluster_weights=[
                                      [.2, .2, .2, .4],
                                      [.3, .2, .5],
                                  ],
                                  cctypes=['normal'] * 6,
                                  distargs=[None] * 6,
                                  separation=[0.95] * 6,
                                  view_partition=[0, 0, 0, 1, 1, 1],
                                  rng=gu.gen_rng(12))

    state = State(
        D.T,
        outputs=range(10, 16),
        cctypes=['normal'] * len(D),
        distargs=[None] * 6,
        rng=gu.gen_rng(122),
    )

    engine = Engine(
        D.T,
        outputs=range(10, 16),
        cctypes=['normal'] * len(D),
        distargs=[None] * 6,
        rng=gu.gen_rng(122),
    )

    def check_errors(cgpm):
        with pytest.raises(ValueError):
            cgpm.transition_loom(N=10, S=5)
        with pytest.raises(ValueError):
            cgpm.transition_loom(N=10, kernels=['alpha'])
        with pytest.raises(ValueError):
            cgpm.transition_loom(N=10, progress=True)
        with pytest.raises(ValueError):
            cgpm.transition_loom(N=10, progress=True)
        with pytest.raises(ValueError):
            cgpm.transition_loom(N=10, checkpoint=2)
        cgpm.transition_loom(N=2)

    check_errors(state)
    check_errors(engine)
Пример #30
0
def gen_simple_engine(multiprocess=1):
    data = np.array([[1, 1, 1]])
    R = len(data)
    D = len(data[0])
    outputs = range(D)
    engine = Engine(
        X=data,
        num_states=20,
        rng=gu.gen_rng(1),
        multiprocess=multiprocess,
        outputs=outputs,
        alpha=1.,
        cctypes=['bernoulli']*D,
        distargs={i: {'alpha': 1., 'beta': 1.} for i in outputs},
        Zv={0: 0, 1: 0, 2: 1},
        view_alphas=[1.]*D,
        Zrv={0: [0]*R, 1: [0]*R})
    return engine