Пример #1
0
def test_runner_multiprocessing_convergence():
    domains = [4]
    defn = model_definition(domains, [((0, 0), bb)])
    prng = rng()
    relations, posterior = data_with_posterior(defn, prng)
    views = map(numpy_dataview, relations)
    latents = [model.initialize(defn, views, prng)
               for _ in xrange(mp.cpu_count())]
    kc = [('assign', range(len(domains)))]
    runners = [runner.runner(defn, views, latent, kc) for latent in latents]
    r = parallel.runner(runners)
    r.run(r=prng, niters=10000)  # burnin
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}

    def sample_iter():
        r.run(r=prng, niters=10)
        for latent in r.get_latents():
            key = tuple(tuple(permutation_canonical(latent.assignments(i)))
                        for i in xrange(len(domains)))
            yield idmap[key]

    ref = [None]

    def sample_fn():
        if ref[0] is None:
            ref[0] = sample_iter()
        try:
            return next(ref[0])
        except StopIteration:
            ref[0] = None
        return sample_fn()

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
Пример #2
0
def test_runner_multiprocessing_convergence():
    N, D = 4, 5
    defn = model_definition(N, [bb] * D)
    prng = rng()
    Y, posterior = data_with_posterior(defn, r=prng)
    view = numpy_dataview(Y)
    latents = [model.initialize(defn, view, prng)
               for _ in xrange(mp.cpu_count())]
    runners = [runner.runner(defn, view, latent, ['assign'])
               for latent in latents]
    r = parallel.runner(runners)
    r.run(r=prng, niters=1000)  # burnin
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_iter():
        r.run(r=prng, niters=10)
        for latent in r.get_latents():
            yield idmap[tuple(permutation_canonical(latent.assignments()))]

    ref = [None]

    def sample_fn():
        if ref[0] is None:
            ref[0] = sample_iter()
        try:
            return next(ref[0])
        except StopIteration:
            ref[0] = None
        return sample_fn()

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
Пример #3
0
def test_runner_multiprocessing():
    defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)])
    views = map(numpy_dataview, toy_dataset(defn))
    kc = runner.default_kernel_config(defn)
    prng = rng()
    latents = [model.initialize(defn, views, prng)
               for _ in xrange(mp.cpu_count())]
    runners = [runner.runner(defn, views, latent, kc) for latent in latents]
    r = parallel.runner(runners)
    # check it is restartable
    r.run(r=prng, niters=10)
    r.run(r=prng, niters=10)
Пример #4
0
def test_runner_multyvac():
    defn = model_definition(10, [bb, nich, niw(3)])
    Y = toy_dataset(defn)
    view = numpy_dataview(Y)
    kc = runner.default_kernel_config(defn)
    prng = rng()
    latents = [model.initialize(defn, view, prng)
               for _ in xrange(2)]
    runners = [runner.runner(defn, view, latent, kc) for latent in latents]
    r = parallel.runner(runners, backend='multyvac', layer='perf', core='f2')
    r.run(r=prng, niters=1000)
    r.run(r=prng, niters=1000)
Пример #5
0
def test_runner_multiprocessing():
    defn = model_definition(10, [bb, nich, niw(3)])
    Y = toy_dataset(defn)
    view = numpy_dataview(Y)
    kc = runner.default_kernel_config(defn)
    prng = rng()
    latents = [model.initialize(defn, view, prng)
               for _ in xrange(mp.cpu_count())]
    runners = [runner.runner(defn, view, latent, kc) for latent in latents]
    r = parallel.runner(runners)
    # check it is restartable
    r.run(r=prng, niters=10)
    r.run(r=prng, niters=10)
Пример #6
0
def test_runner_multiprocessing():
    defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)])
    views = map(numpy_dataview, toy_dataset(defn))
    kc = runner.default_kernel_config(defn)
    prng = rng()
    latents = [
        model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count())
    ]
    runners = [runner.runner(defn, views, latent, kc) for latent in latents]
    r = parallel.runner(runners)
    # check it is restartable
    r.run(r=prng, niters=10)
    r.run(r=prng, niters=10)
Пример #7
0
def run_dpgmm(niter=1000, datadir="../../", nfeatures=13):

    ranking = [10,  6,  7, 26,  5,  8,  4, 19, 12, 23, 24, 33, 28, 25,
               14,  3,  0, 1, 21, 30, 11, 31, 13,  9, 22,  2, 27, 29,
               32, 17, 18, 20, 16, 15]

    features, labels, lc, hr, tstart, \
        features_lb, labels_lb, lc_lb, hr_lb, \
        fscaled, fscaled_lb, fscaled_full, labels_all = \
            load_data(datadir, tseg=1024.0, log_features=None,
                      ranking=ranking)

    labels_phys = feature_engineering.convert_labels_to_physical(labels)
    labels_phys_lb = feature_engineering.convert_labels_to_physical(labels_lb)

    labels_all_phys = np.hstack([labels_phys["train"], labels_phys["val"],
                                 labels_phys["test"]])


    fscaled_small = fscaled_full[:, :13]

    nchains = 8

    # The random state object
    prng = rng()

    # Define a DP-GMM where the Gaussian is 2D
    defn = model_definition(fscaled_small.shape[0],
                            [normal_inverse_wishart(fscaled_small.shape[1])])

    fscaled_rec = np.array([(list(f),) for f in fscaled_small],
                           dtype=[('', np.float32, fscaled_small.shape[1])])

    # Create a wrapper around the numpy recarray which
    # data-microscopes understands
    view = numpy_dataview(fscaled_rec)

    # Initialize nchains start points randomly in the state space
    latents = [model.initialize(defn, view, prng) for _ in xrange(nchains)]

    # Create a runner for each chain
    runners = [runner.runner(defn, view, latent,
                             kernel_config=['assign']) for latent in latents]
    r = parallel.runner(runners)

    r.run(r=prng, niters=niter)

    with open(datadir+"grs1915_dpgmm.pkl", "w") as f:
        pickle.dump(r, f)

    return
Пример #8
0
def test_runner_multiprocessing_convergence():
    domains = [4]
    defn = model_definition(domains, [((0, 0), bb)])
    prng = rng()
    relations, posterior = data_with_posterior(defn, prng)
    views = map(numpy_dataview, relations)
    latents = [
        model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count())
    ]
    kc = [('assign', range(len(domains)))]
    runners = [runner.runner(defn, views, latent, kc) for latent in latents]
    r = parallel.runner(runners)
    r.run(r=prng, niters=10000)  # burnin
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}

    def sample_iter():
        r.run(r=prng, niters=10)
        for latent in r.get_latents():
            key = tuple(
                tuple(permutation_canonical(latent.assignments(i)))
                for i in xrange(len(domains)))
            yield idmap[key]

    ref = [None]

    def sample_fn():
        if ref[0] is None:
            ref[0] = sample_iter()
        try:
            return next(ref[0])
        except StopIteration:
            ref[0] = None
        return sample_fn()

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
Пример #9
0
defn = model_definition([N], [((0, 0), beta_bernoulli)])
views = [numpy_dataview(communications_relation)]
prng = rng()


# ##Next, let's initialize the model and define the runners.  
# 
# ##These runners are our MCMC chains. We'll use `cpu_count` to define our number of chains.

# In[ ]:

nchains = cpu_count()
latents = [model.initialize(defn, views, r=prng, cluster_hps=[{'alpha':1e-3}]) for _ in xrange(nchains)]
kc = runner.default_assign_kernel_config(defn)
runners = [runner.runner(defn, views, latent, kc) for latent in latents]
r = parallel.runner(runners)


# ##From here, we can finally run each chain of the sampler 1000 times

# In[ ]:

start = time.time()
r.run(r=prng, niters=1000)
print "inference took {} seconds".format(time.time() - start)


# ##Now that we have learned our model let's get our cluster assignments

# In[ ]:
Пример #10
0
def infinite_relational_model(corr_matrix, lag_matrix, threshold, sampled_coords, window_size):
    import numpy as np
    import math
    import json
    import time
    import itertools as it
    from multiprocessing import cpu_count
    from microscopes.common.rng import rng
    from microscopes.common.relation.dataview import numpy_dataview
    from microscopes.models import bb as beta_bernoulli
    from microscopes.irm.definition import model_definition
    from microscopes.irm import model, runner, query
    from microscopes.kernels import parallel
    from microscopes.common.query import groups, zmatrix_heuristic_block_ordering, zmatrix_reorder

    cluster_matrix = []
    graph = []

    # calculate graph
    for row in corr_matrix:
        graph_row = []
        for corr in row:
            if corr < threshold:
                graph_row.append(False)
            else:
                graph_row.append(True)

        graph.append(graph_row)

    graph = np.array(graph, dtype=np.bool)

    graph_size = len(graph)

    # conduct Infinite Relational Model
    defn = model_definition([graph_size], [((0, 0), beta_bernoulli)])
    views = [numpy_dataview(graph)]
    prng = rng()

    nchains = cpu_count()
    latents = [model.initialize(defn, views, r=prng, cluster_hps=[{'alpha':1e-3}]) for _ in xrange(nchains)]
    kc = runner.default_assign_kernel_config(defn)
    runners = [runner.runner(defn, views, latent, kc) for latent in latents]
    r = parallel.runner(runners)

    start = time.time()
    # r.run(r=prng, niters=1000)
    # r.run(r=prng, niters=100)
    r.run(r=prng, niters=20)
    print ("inference took", time.time() - start, "seconds")

    infers = r.get_latents()
    clusters = groups(infers[0].assignments(0), sort=True)
    ordering = list(it.chain.from_iterable(clusters))

    z = graph.copy()
    z = z[ordering]
    z = z[:,ordering]

    corr_matrix = corr_matrix[ordering]
    corr_matrix = corr_matrix[:,ordering]

    lag_matrix = lag_matrix[ordering]
    lag_matrix = lag_matrix[:,ordering]

    cluster_sampled_coords = np.array(sampled_coords)
    cluster_sampled_coords = cluster_sampled_coords[ordering]

    response_msg = {
        'corrMatrix': corr_matrix.tolist(),
        'lagMatrix': lag_matrix.tolist(),
        'clusterMatrix': z.tolist(),
        'clusterSampledCoords': cluster_sampled_coords.tolist(),
        'nClusterList': [len(cluster) for cluster in clusters],
        'ordering': ordering,
    }
    f = open("./expdata/clustermatrix-" + str(window_size) + ".json", "w")
    json.dump(response_msg, f)
    f.close()

    return response_msg
Пример #11
0
# ##Next, let's initialize the model and define the runners.
#
# ##These runners are our MCMC chains. We'll use `cpu_count` to define our number of chains.

# In[ ]:

nchains = cpu_count()
latents = [
    model.initialize(defn, views, r=prng, cluster_hps=[{
        'alpha': 1e-3
    }]) for _ in xrange(nchains)
]
kc = runner.default_assign_kernel_config(defn)
runners = [runner.runner(defn, views, latent, kc) for latent in latents]
r = parallel.runner(runners)

# ##From here, we can finally run each chain of the sampler 1000 times

# In[ ]:

start = time.time()
r.run(r=prng, niters=1000)
print "inference took {} seconds".format(time.time() - start)

# ##Now that we have learned our model let's get our cluster assignments

# In[ ]:

infers = r.get_latents()
clusters = groups(infers[0].assignments(0), sort=True)