Exemplo n.º 1
0
def test_slice_theta_mm():
    N = 100
    data = np.array([(np.random.random() < 0.8, ) for _ in xrange(N)],
                    dtype=[('', bool)])
    defn = model_definition(N, [bbnc])
    r = rng()
    prior = {'alpha': 1.0, 'beta': 9.0}
    view = numpy_dataview(data)
    s = initialize(defn,
                   view,
                   cluster_hp={
                       'alpha': 1.,
                       'beta': 9.
                   },
                   feature_hps=[prior],
                   r=r,
                   assignment=[0] * N)

    heads = len([1 for y in data if y[0]])
    tails = N - heads

    alpha1 = prior['alpha'] + heads
    beta1 = prior['beta'] + tails

    bs = bind(s, view)
    params = {0: {'p': 0.05}}

    def sample_fn():
        theta(bs, r, tparams=params)
        return s.get_suffstats(0, 0)['p']

    rv = beta(alpha1, beta1)
    assert_1d_cont_dist_approx_sps(sample_fn, rv, nsamples=50000)
def _test_convergence_bb_cxx(N,
                             D,
                             kernel,
                             preprocess_data_fn=None,
                             nonconj=False,
                             burnin_niters=10000,
                             skip=10,
                             ntries=50,
                             nsamples=1000,
                             kl_places=2):
    r = rng()
    cluster_hp = {'alpha': 2.0}
    feature_hps = [{'alpha': 1.0, 'beta': 1.0}] * D
    defn = model_definition(N, [bb] * D)
    nonconj_defn = model_definition(N, [bbnc] * D)
    Y, posterior = data_with_posterior(defn, cluster_hp, feature_hps,
                                       preprocess_data_fn)
    data = numpy_dataview(Y)
    s = initialize(nonconj_defn if nonconj else defn,
                   data,
                   cluster_hp=cluster_hp,
                   feature_hps=feature_hps,
                   r=r)
    bs = bind(s, data)
    wrapped_kernel = lambda s: kernel(s, r)
    _test_convergence(bs, posterior, wrapped_kernel, burnin_niters, skip,
                      ntries, nsamples, kl_places)
Exemplo n.º 3
0
def test_get_set_params():
    defn = model_definition(1, [bb, bnb, gp, nich])
    data = np.array([
        (True, 3, 5, 10.),
    ],
                    dtype=[('', bool), ('', int), ('', int), ('', float)])
    s = initialize(defn=defn, data=numpy_dataview(data), r=rng())
    s.set_cluster_hp({'alpha': 3.0})
    assert_dict_almost_equals(s.get_cluster_hp(), {'alpha': 3.0})
    hyperparams = [
        {
            'alpha': 1.2,
            'beta': 4.3
        },
        {
            'alpha': 1.,
            'beta': 1.,
            'r': 1
        },
        {
            'alpha': 1.,
            'inv_beta': 1.
        },
        {
            'mu': 30.,
            'kappa': 1.,
            'sigmasq': 1.,
            'nu': 1.
        },
    ]
    for i, hp in enumerate(hyperparams):
        s.set_feature_hp(i, hp)
        assert_dict_almost_equals(s.get_feature_hp(i), hp)
Exemplo n.º 4
0
def test_runner_multiprocessing_convergence():
    N, D = 4, 5
    defn = model_definition(N, [bb] * D)
    prng = rng()
    Y, posterior = data_with_posterior(defn, r=prng)
    view = numpy_dataview(Y)
    latents = [model.initialize(defn, view, prng)
               for _ in xrange(mp.cpu_count())]
    runners = [runner.runner(defn, view, latent, ['assign'])
               for latent in latents]
    r = parallel.runner(runners)
    r.run(r=prng, niters=1000)  # burnin
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_iter():
        r.run(r=prng, niters=10)
        for latent in r.get_latents():
            yield idmap[tuple(permutation_canonical(latent.assignments()))]

    ref = [None]

    def sample_fn():
        if ref[0] is None:
            ref[0] = sample_iter()
        try:
            return next(ref[0])
        except StopIteration:
            ref[0] = None
        return sample_fn()

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
def _test_convergence_bb_cxx(N,
                             D,
                             kernel,
                             preprocess_data_fn=None,
                             nonconj=False,
                             burnin_niters=10000,
                             skip=10,
                             ntries=50,
                             nsamples=1000,
                             kl_places=2):
    r = rng()
    cluster_hp = {'alpha': 2.0}
    feature_hps = [{'alpha': 1.0, 'beta': 1.0}] * D
    defn = model_definition(N, [bb] * D)
    nonconj_defn = model_definition(N, [bbnc] * D)
    Y, posterior = data_with_posterior(
        defn, cluster_hp, feature_hps, preprocess_data_fn)
    data = numpy_dataview(Y)
    s = initialize(nonconj_defn if nonconj else defn,
                   data,
                   cluster_hp=cluster_hp,
                   feature_hps=feature_hps,
                   r=r)
    bs = bind(s, data)
    wrapped_kernel = lambda s: kernel(s, r)
    _test_convergence(bs,
                      posterior,
                      wrapped_kernel,
                      burnin_niters,
                      skip,
                      ntries,
                      nsamples,
                      kl_places)
Exemplo n.º 6
0
def test_slice_theta_mm():
    N = 100
    data = np.array(
        [(np.random.random() < 0.8,) for _ in xrange(N)],
        dtype=[('', bool)])
    defn = model_definition(N, [bbnc])
    r = rng()
    prior = {'alpha': 1.0, 'beta': 9.0}
    view = numpy_dataview(data)
    s = initialize(
        defn,
        view,
        cluster_hp={'alpha': 1., 'beta': 9.},
        feature_hps=[prior],
        r=r,
        assignment=[0] * N)

    heads = len([1 for y in data if y[0]])
    tails = N - heads

    alpha1 = prior['alpha'] + heads
    beta1 = prior['beta'] + tails

    bs = bind(s, view)
    params = {0: {'p': 0.05}}

    def sample_fn():
        theta(bs, r, tparams=params)
        return s.get_suffstats(0, 0)['p']

    rv = beta(alpha1, beta1)
    assert_1d_cont_dist_approx_sps(sample_fn, rv, nsamples=50000)
Exemplo n.º 7
0
 def crp_score(assignment):
     latent = initialize(defn,
                         view,
                         r=r,
                         cluster_hp={'alpha': alpha},
                         assignment=assignment)
     return latent.score_assignment()
Exemplo n.º 8
0
def test_posterior_predictive_statistic():
    N, D = 10, 4  # D needs to be even
    defn = model_definition(N, [bb] * D)
    Y = toy_dataset(defn)
    prng = rng()
    view = numpy_dataview(Y)
    latents = [model.initialize(defn, view, prng) for _ in xrange(10)]
    q = ma.masked_array(
        np.array([(False,) * D], dtype=[('', bool)] * D),
        mask=[(False,) * (D / 2) + (True,) * (D / 2)])

    statistic = query.posterior_predictive_statistic(q, latents, prng)
    assert_equals(statistic.shape, (1,))
    assert_equals(len(statistic.dtype), D)

    statistic = query.posterior_predictive_statistic(
        q, latents, prng, merge='mode')
    assert_equals(statistic.shape, (1,))
    assert_equals(len(statistic.dtype), D)

    statistic = query.posterior_predictive_statistic(
        q, latents, prng, merge=['mode', 'mode', 'avg', 'avg'])
    assert_equals(statistic.shape, (1,))
    assert_equals(len(statistic.dtype), D)

    q = ma.masked_array(
        np.array([(False,) * D] * 3, dtype=[('', bool)] * D),
        mask=[(False,) * (D / 2) + (True,) * (D / 2)] * 3)
    statistic = query.posterior_predictive_statistic(q, latents, prng)
    assert_equals(statistic.shape, (3,))
    assert_equals(len(statistic.dtype), D)
Exemplo n.º 9
0
 def score_fn(assignment):
     s = initialize(defn,
                    data,
                    r,
                    cluster_hp=cluster_hp,
                    feature_hps=feature_hps,
                    assignment=assignment)
     return s.score_joint(r)
Exemplo n.º 10
0
def test_zmatrix():
    N, D = 10, 4
    defn = model_definition(N, [bb] * D)
    Y = toy_dataset(defn)
    prng = rng()
    view = numpy_dataview(Y)
    latents = [model.initialize(defn, view, prng) for _ in xrange(10)]
    zmat = query.zmatrix(latents)
    assert_equals(zmat.shape, (N, N))
Exemplo n.º 11
0
def test_runner_multyvac():
    defn = model_definition(10, [bb, nich, niw(3)])
    Y = toy_dataset(defn)
    view = numpy_dataview(Y)
    kc = runner.default_kernel_config(defn)
    prng = rng()
    latents = [model.initialize(defn, view, prng)
               for _ in xrange(2)]
    runners = [runner.runner(defn, view, latent, kc) for latent in latents]
    r = parallel.runner(runners, backend='multyvac', layer='perf', core='f2')
    r.run(r=prng, niters=1000)
    r.run(r=prng, niters=1000)
Exemplo n.º 12
0
def _test_scalar_hp_inference(view,
                              prior_fn,
                              w,
                              grid_min,
                              grid_max,
                              grid_n,
                              likelihood_model,
                              scalar_hp_key,
                              burnin=1000,
                              nsamples=1000,
                              every=10,
                              trials=100,
                              places=2):
    """
    view must be 1D
    """
    r = rng()

    hparams = {0: {scalar_hp_key: (prior_fn, w)}}

    def score_fn(scalar):
        d = latent.get_feature_hp(0)
        prev_scalar = d[scalar_hp_key]
        d[scalar_hp_key] = scalar
        latent.set_feature_hp(0, d)
        score = prior_fn(scalar) + latent.score_data(0, None, r)
        d[scalar_hp_key] = prev_scalar
        latent.set_feature_hp(0, d)
        return score

    defn = model_definition(len(view), [likelihood_model])
    latent = initialize(defn, view, r=r)
    model = bind(latent, view)

    def sample_fn():
        for _ in xrange(every):
            slice_hp(model, r, hparams=hparams)
        return latent.get_feature_hp(0)[scalar_hp_key]

    for _ in xrange(burnin):
        slice_hp(model, r, hparams=hparams)
    print 'finished burnin of', burnin, 'iterations'

    print 'grid_min', grid_min, 'grid_max', grid_max
    assert_1d_cont_dist_approx_emp(sample_fn,
                                   score_fn,
                                   grid_min,
                                   grid_max,
                                   grid_n,
                                   trials,
                                   nsamples,
                                   places)
Exemplo n.º 13
0
def test_runner_multiprocessing():
    defn = model_definition(10, [bb, nich, niw(3)])
    Y = toy_dataset(defn)
    view = numpy_dataview(Y)
    kc = runner.default_kernel_config(defn)
    prng = rng()
    latents = [model.initialize(defn, view, prng)
               for _ in xrange(mp.cpu_count())]
    runners = [runner.runner(defn, view, latent, kc) for latent in latents]
    r = parallel.runner(runners)
    # check it is restartable
    r.run(r=prng, niters=10)
    r.run(r=prng, niters=10)
Exemplo n.º 14
0
def run_dpgmm(niter=1000, datadir="../../", nfeatures=13):

    ranking = [10,  6,  7, 26,  5,  8,  4, 19, 12, 23, 24, 33, 28, 25,
               14,  3,  0, 1, 21, 30, 11, 31, 13,  9, 22,  2, 27, 29,
               32, 17, 18, 20, 16, 15]

    features, labels, lc, hr, tstart, \
        features_lb, labels_lb, lc_lb, hr_lb, \
        fscaled, fscaled_lb, fscaled_full, labels_all = \
            load_data(datadir, tseg=1024.0, log_features=None,
                      ranking=ranking)

    labels_phys = feature_engineering.convert_labels_to_physical(labels)
    labels_phys_lb = feature_engineering.convert_labels_to_physical(labels_lb)

    labels_all_phys = np.hstack([labels_phys["train"], labels_phys["val"],
                                 labels_phys["test"]])


    fscaled_small = fscaled_full[:, :13]

    nchains = 8

    # The random state object
    prng = rng()

    # Define a DP-GMM where the Gaussian is 2D
    defn = model_definition(fscaled_small.shape[0],
                            [normal_inverse_wishart(fscaled_small.shape[1])])

    fscaled_rec = np.array([(list(f),) for f in fscaled_small],
                           dtype=[('', np.float32, fscaled_small.shape[1])])

    # Create a wrapper around the numpy recarray which
    # data-microscopes understands
    view = numpy_dataview(fscaled_rec)

    # Initialize nchains start points randomly in the state space
    latents = [model.initialize(defn, view, prng) for _ in xrange(nchains)]

    # Create a runner for each chain
    runners = [runner.runner(defn, view, latent,
                             kernel_config=['assign']) for latent in latents]
    r = parallel.runner(runners)

    r.run(r=prng, niters=niter)

    with open(datadir+"grs1915_dpgmm.pkl", "w") as f:
        pickle.dump(r, f)

    return
def test_get_set_params():
    defn = model_definition(1, [bb, bnb, gp, nich])
    data = np.array([(True, 3, 5, 10.), ],
                    dtype=[('', bool), ('', int), ('', int), ('', float)])
    s = initialize(defn=defn, data=numpy_dataview(data), r=rng())
    s.set_cluster_hp({'alpha': 3.0})
    assert_dict_almost_equals(s.get_cluster_hp(), {'alpha': 3.0})
    hyperparams = [
        {'alpha': 1.2, 'beta': 4.3},
        {'alpha': 1., 'beta': 1., 'r': 1},
        {'alpha': 1., 'inv_beta': 1.},
        {'mu': 30., 'kappa': 1., 'sigmasq': 1., 'nu': 1.},
    ]
    for i, hp in enumerate(hyperparams):
        s.set_feature_hp(i, hp)
        assert_dict_almost_equals(s.get_feature_hp(i), hp)
Exemplo n.º 16
0
def _test_scalar_hp_inference(view,
                              prior_fn,
                              w,
                              grid_min,
                              grid_max,
                              grid_n,
                              likelihood_model,
                              scalar_hp_key,
                              burnin=1000,
                              nsamples=1000,
                              every=10,
                              trials=100,
                              places=2):
    """
    view must be 1D
    """
    r = rng()

    hparams = {0: {scalar_hp_key: (prior_fn, w)}}

    def score_fn(scalar):
        d = latent.get_feature_hp(0)
        prev_scalar = d[scalar_hp_key]
        d[scalar_hp_key] = scalar
        latent.set_feature_hp(0, d)
        score = prior_fn(scalar) + latent.score_data(0, None, r)
        d[scalar_hp_key] = prev_scalar
        latent.set_feature_hp(0, d)
        return score

    defn = model_definition(len(view), [likelihood_model])
    latent = initialize(defn, view, r=r)
    model = bind(latent, view)

    def sample_fn():
        for _ in xrange(every):
            slice_hp(model, r, hparams=hparams)
        return latent.get_feature_hp(0)[scalar_hp_key]

    for _ in xrange(burnin):
        slice_hp(model, r, hparams=hparams)
    print 'finished burnin of', burnin, 'iterations'

    print 'grid_min', grid_min, 'grid_max', grid_max
    assert_1d_cont_dist_approx_emp(sample_fn, score_fn, grid_min, grid_max,
                                   grid_n, trials, nsamples, places)
Exemplo n.º 17
0
def test_runner_convergence():
    N, D = 4, 5
    defn = model_definition(N, [bb] * D)
    prng = rng()
    Y, posterior = data_with_posterior(defn, r=prng)
    view = numpy_dataview(Y)
    latent = model.initialize(defn, view, prng)
    r = runner.runner(defn, view, latent, ['assign'])
    r.run(r=prng, niters=1000)  # burnin
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_fn():
        r.run(r=prng, niters=10)
        new_latent = r.get_latent()
        return idmap[tuple(permutation_canonical(new_latent.assignments()))]

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
Exemplo n.º 18
0
def latent(groups, entities_per_group, features, r):
    N = groups * entities_per_group
    defn = model_definition(N, [bb] * features)

    # generate fake data
    Y = np.random.random(size=(N, features)) <= 0.5
    view = numpy_dataview(
        np.array([tuple(y) for y in Y], dtype=[('', bool)] * features))

    # assign entities to their respective groups
    assignment = [[g] * entities_per_group for g in xrange(groups)]
    assignment = list(it.chain.from_iterable(assignment))

    latent = bind(initialize(defn, view, r, assignment=assignment), view)
    latent.create_group(r)  # perftest() doesnt modify group assignments

    return latent
Exemplo n.º 19
0
def test_posterior_predictive():
    N, D = 10, 4  # D needs to be even
    defn = model_definition(N, [bb] * D)
    Y = toy_dataset(defn)
    prng = rng()
    view = numpy_dataview(Y)
    latents = [model.initialize(defn, view, prng) for _ in xrange(10)]

    q = ma.masked_array(
        np.array([(False,) * D], dtype=[('', bool)] * D),
        mask=[(False,) * (D / 2) + (True,) * (D / 2)])
    samples = query.posterior_predictive(q, latents, prng)
    assert_equals(samples.shape, (1, len(latents)))

    q = ma.masked_array(
        np.array([(False,) * D] * 3, dtype=[('', bool)] * D),
        mask=[(False,) * (D / 2) + (True,) * (D / 2)] * 3)
    samples = query.posterior_predictive(q, latents, prng)
    assert_equals(samples.shape, (3, len(latents)))
Exemplo n.º 20
0
def _test_runner_kernel_config(kc_fn, models):
    defn = model_definition(10, models)
    Y = toy_dataset(defn)
    view = numpy_dataview(Y)
    kc = kc_fn(defn)
    prng = rng()

    ntries = 5
    while ntries:
        latent = model.initialize(defn, view, prng)
        assignments = latent.assignments()
        r = runner.runner(defn, view, latent, kc)
        r.run(r=prng, niters=10)
        assignments1 = r.get_latent().assignments()

        # XXX: it should be very unlikely the assignments are all equal
        if assignments == assignments1:
            ntries -= 1
        else:
            return  # success

    assert_true(False)  # exceeded ntries
Exemplo n.º 21
0
def test_mnist_supervised(n):
    mnist_dataset = _get_mnist_dataset()
    classes = range(10)
    classmap = {c: i for i, c in enumerate(classes)}
    train_data, test_data = [], []
    for c in classes:
        Y = mnist_dataset['data'][np.where(
            mnist_dataset['target'] == float(c))[0]]
        Y_train, Y_test = train_test_split(Y, test_size=0.01)
        train_data.append(Y_train)
        test_data.append(Y_test)

    sample_size_max = n

    def mk_class_data(c, Y):
        n, D = Y.shape
        print 'number of digit', c, 'in training is', n
        dtype = [('', bool)] * D + [('', int)]
        inds = np.random.permutation(Y.shape[0])[:sample_size_max]
        Y = np.array([tuple(list(y) + [classmap[c]]) for y in Y[inds]],
                     dtype=dtype)
        return Y

    Y_train = np.hstack(
        [mk_class_data(c, y) for c, y in zip(classes, train_data)])
    Y_train = Y_train[np.random.permutation(np.arange(Y_train.shape[0]))]

    n, = Y_train.shape
    D = len(Y_train.dtype)
    print 'training data is', n, 'examples'
    print 'image dimension is', (D - 1), 'pixels'

    view = numpy_dataview(Y_train)
    defn = model_definition(n, [bb] * (D - 1) + [dd(len(classes))])
    r = rng()
    s = initialize(defn,
                   view,
                   cluster_hp={'alpha': 0.2},
                   feature_hps=[{
                       'alpha': 1.,
                       'beta': 1.
                   }] * (D - 1) + [{
                       'alphas': [1. for _ in classes]
                   }],
                   r=r)

    bound_s = bind(s, view)

    indiv_prior_fn = log_exponential(1.2)
    hparams = {
        i: {
            'alpha': (indiv_prior_fn, 1.5),
            'beta': (indiv_prior_fn, 1.5),
        }
        for i in xrange(D - 1)
    }
    hparams[D - 1] = {
        'alphas[{}]'.format(idx): (indiv_prior_fn, 1.5)
        for idx in xrange(len(classes))
    }

    def print_prediction_results():
        results = []
        for c, Y_test in zip(classes, test_data):
            for y in Y_test:
                query = ma.masked_array(
                    np.array([tuple(y) + (0, )],
                             dtype=[('', bool)] * (D - 1) + [('', int)]),
                    mask=[(False, ) * (D - 1) + (True, )])[0]
                samples = [
                    s.sample_post_pred(query, r)[1][0][-1] for _ in xrange(30)
                ]
                samples = np.bincount(samples, minlength=len(classes))
                prediction = np.argmax(samples)
                results.append((classmap[c], prediction, samples))
            print 'finished predictions for class', c

        Y_actual = np.array([a for a, _, _ in results], dtype=np.int)
        Y_pred = np.array([b for _, b, _ in results], dtype=np.int)
        print 'accuracy:', accuracy_score(Y_actual, Y_pred)
        print 'confusion matrix:'
        print confusion_matrix(Y_actual, Y_pred)

        # AUROC for one vs all (each class)
        for i, clabel in enumerate(classes):
            Y_true = np.copy(Y_actual)

            # treat class c as the "positive" example
            positive_examples = Y_actual == i
            negative_examples = Y_actual != i
            Y_true[positive_examples] = 1
            Y_true[negative_examples] = 0
            Y_prob = np.array([float(c[i]) / c.sum() for _, _, c in results])
            cls_auc = roc_auc_score(Y_true, Y_prob)
            print 'class', clabel, 'auc=', cls_auc

        #import matplotlib.pylab as plt
        #Y_prob = np.array([c for _, _, c in results])
        #fpr, tpr, thresholds = roc_curve(Y_actual, Y_prob, pos_label=0)
        #plt.plot(fpr, tpr)
        #plt.show()

    def kernel(rid):
        start0 = time.time()
        assign(bound_s, r)
        sec0 = time.time() - start0

        start1 = time.time()
        hp(bound_s, r, hparams=hparams)
        sec1 = time.time() - start1

        print 'rid=', rid, 'nclusters=', s.ngroups(), \
            'iter0=', sec0, 'sec', 'iter1=', sec1, 'sec'

        sec_per_post_pred = sec0 / (float(view.size()) * (float(s.ngroups())))
        print '  time_per_post_pred=', sec_per_post_pred, 'sec'

    # training
    iters = 30
    for rid in xrange(iters):
        kernel(rid)

    # print group size breakdown
    sizes = [(gid, s.groupsize(gid)) for gid in s.groups()]
    sizes = sorted(sizes, key=lambda x: x[1], reverse=True)
    print '  group_sizes=', sizes

    #print_prediction_results()

    # save state
    mkdirp("mnist-states")
    fname = os.path.join("mnist-states", "state-iter{}.ser".format(rid))
    with open(fname, "w") as fp:
        fp.write(s.serialize())
Exemplo n.º 22
0
 def crp_score(assignment):
     latent = initialize(
         defn, view, r=r,
         cluster_hp={'alpha': alpha}, assignment=assignment)
     return latent.score_assignment()
Exemplo n.º 23
0
def test_mnist():
    import matplotlib.pylab as plt
    from PIL import Image, ImageOps
    mnist_dataset = _get_mnist_dataset()
    Y_2 = mnist_dataset['data'][np.where(mnist_dataset['target'] == 2.)[0]]
    Y_3 = mnist_dataset['data'][np.where(mnist_dataset['target'] == 3.)[0]]
    print 'number of twos:', Y_2.shape[0]
    print 'number of threes:', Y_3.shape[0]
    _, D = Y_2.shape
    W = int(math.sqrt(D))
    assert W * W == D
    dtype = [('', bool)] * D
    Y = np.vstack([Y_2, Y_3])
    Y = np.array(
        [tuple(y) for y in Y[np.random.permutation(np.arange(Y.shape[0]))]],
        dtype=dtype)

    view = numpy_dataview(Y)
    defn = model_definition(Y.shape[0], [bb] * D)
    r = rng()
    s = initialize(
        defn,
        view,
        cluster_hp={'alpha': 0.2},
        feature_hps=[{'alpha': 1., 'beta': 1.}] * D,
        r=r)
    bound_s = bind(s, view)

    indiv_prior_fn = log_exponential(1.2)
    hparams = {
        i: {
            'alpha': (indiv_prior_fn, 1.5),
            'beta': (indiv_prior_fn, 1.5),
        } for i in xrange(D)}

    def plot_clusters(s, fname, scalebysize=False):
        hps = [s.get_feature_hp(i) for i in xrange(D)]

        def prior_prob(hp):
            return hp['alpha'] / (hp['alpha'] + hp['beta'])

        def data_for_group(gid):
            suffstats = [s.get_suffstats(gid, i) for i in xrange(D)]

            def prob(hp, ss):
                top = hp['alpha'] + ss['heads']
                bot = top + hp['beta'] + ss['tails']
                return top / bot
            probs = [prob(hp, ss) for hp, ss in zip(hps, suffstats)]
            return np.array(probs)

        def scale(d, weight):
            im = d.reshape((W, W))
            newW = max(int(weight * W), 1)
            im = Image.fromarray(im)
            im = im.resize((newW, newW))
            im = ImageOps.expand(im, border=(W - newW) / 2)
            im = np.array(im)
            a, b = im.shape
            #print 'a,b:', a, b
            if a < W:
                im = np.append(im, np.zeros(b)[np.newaxis, :], axis=0)
            elif a > W:
                im = im[:W, :]
            assert im.shape[0] == W
            if b < W:
                #print 'current:', im.shape
                im = np.append(im, np.zeros(W)[:, np.newaxis], axis=1)
            elif b > W:
                im = im[:, :W]
            assert im.shape[1] == W
            return im.flatten()

        data = [(data_for_group(g), cnt) for g, cnt in groupsbysize(s)]
        largest = max(cnt for _, cnt in data)
        data = [scale(d, cnt / float(largest)) if scalebysize else d
                for d, cnt in data]
        digits_per_row = 12
        rem = len(data) % digits_per_row
        if rem:
            fill = digits_per_row - rem
            for _ in xrange(fill):
                data.append(np.zeros(D))
        assert not (len(data) % digits_per_row)
        #rows = len(data) / digits_per_row
        data = np.vstack([np.hstack([d.reshape((W, W))
                         for d in data[i:i + digits_per_row]])
                         for i in xrange(0, len(data), digits_per_row)])
        #print 'saving figure', fname
        plt.imshow(data, cmap=plt.cm.binary, interpolation='nearest')
        plt.savefig(fname)
        plt.close()

    def plot_hyperparams(s, fname):
        hps = [s.get_feature_hp(i) for i in xrange(D)]
        alphas = np.array([hp['alpha'] for hp in hps])
        betas = np.array([hp['beta'] for hp in hps])
        data = np.hstack([alphas.reshape((W, W)), betas.reshape((W, W))])
        plt.imshow(data, interpolation='nearest')
        plt.colorbar()
        plt.savefig(fname)
        plt.close()

    def kernel(rid):
        start0 = time.time()
        assign(bound_s, r)
        sec0 = time.time() - start0

        start1 = time.time()
        hp(bound_s, r, hparams=hparams)
        sec1 = time.time() - start1

        print 'rid=', rid, 'nclusters=', s.ngroups(), \
            'iter0=', sec0, 'sec', 'iter1=', sec1, 'sec'

        sec_per_post_pred = sec0 / (float(view.size()) * (float(s.ngroups())))
        print '  time_per_post_pred=', sec_per_post_pred, 'sec'

        return s.score_joint(r)

    # burnin
    burnin = 20
    for rid in xrange(burnin):
        print 'score:', kernel(rid)
    print 'finished burnin'
    plot_clusters(s, 'mnist_clusters.pdf')
    plot_clusters(s, 'mnist_clusters_bysize.pdf', scalebysize=True)
    plot_hyperparams(s, 'mnist_hyperparams.pdf')
    print 'groupcounts:', groupcounts(s)

    # posterior predictions
    present = D / 2
    absent = D - present
    queries = [tuple(Y_2[i]) for i in np.random.permutation(Y_2.shape[0])[:4]] + \
              [tuple(Y_3[i]) for i in np.random.permutation(Y_3.shape[0])[:4]]

    queries_masked = ma.masked_array(
        np.array(queries, dtype=[('', bool)] * D),
        mask=[(False,) * present + (True,) * absent])

    def postpred_sample(y_new):
        Y_samples = [s.sample_post_pred(y_new, r)[1] for _ in xrange(1000)]
        Y_samples = np.array([list(y) for y in np.hstack(Y_samples)])
        Y_avg = Y_samples.mean(axis=0)
        return Y_avg

    queries_masked = [postpred_sample(y) for y in queries_masked]
    data0 = np.hstack([q.reshape((W, W)) for q in queries_masked])
    data1 = np.hstack(
        [np.clip(np.array(q, dtype=np.float), 0., 1.).reshape((W, W))
         for q in queries])
    data = np.vstack([data0, data1])
    plt.imshow(data, cmap=plt.cm.binary, interpolation='nearest')
    plt.savefig('mnist_predict.pdf')
    plt.close()
Exemplo n.º 24
0
def test_mnist_supervised():
    mnist_dataset = _get_mnist_dataset()
    classes = range(10)
    classmap = {c: i for i, c in enumerate(classes)}
    train_data, test_data = [], []
    for c in classes:
        Y = mnist_dataset['data'][
            np.where(mnist_dataset['target'] == float(c))[0]]
        Y_train, Y_test = train_test_split(Y, test_size=0.01)
        train_data.append(Y_train)
        test_data.append(Y_test)

    sample_size_max = 10000

    def mk_class_data(c, Y):
        n, D = Y.shape
        print 'number of digit', c, 'in training is', n
        dtype = [('', bool)] * D + [('', int)]
        inds = np.random.permutation(Y.shape[0])[:sample_size_max]
        Y = np.array([tuple(list(y) + [classmap[c]]) for y in Y[inds]],
                     dtype=dtype)
        return Y
    Y_train = np.hstack([mk_class_data(c, y)
                         for c, y in zip(classes, train_data)])
    Y_train = Y_train[np.random.permutation(np.arange(Y_train.shape[0]))]

    n, = Y_train.shape
    D = len(Y_train.dtype)
    print 'training data is', n, 'examples'
    print 'image dimension is', (D - 1), 'pixels'

    view = numpy_dataview(Y_train)
    defn = model_definition(n, [bb] * (D - 1) + [dd(len(classes))])
    r = rng()
    s = initialize(defn,
                   view,
                   cluster_hp={'alpha': 0.2},
                   feature_hps=[{'alpha': 1., 'beta': 1.}] *
                   (D - 1) + [{'alphas': [1. for _ in classes]}],
                   r=r)

    bound_s = bind(s, view)

    indiv_prior_fn = log_exponential(1.2)
    hparams = {
        i: {
            'alpha': (indiv_prior_fn, 1.5),
            'beta': (indiv_prior_fn, 1.5),
        } for i in xrange(D - 1)}
    hparams[D - 1] = {
        'alphas[{}]'.format(idx): (indiv_prior_fn, 1.5)
        for idx in xrange(len(classes))
    }

    def print_prediction_results():
        results = []
        for c, Y_test in zip(classes, test_data):
            for y in Y_test:
                query = ma.masked_array(
                    np.array([tuple(y) + (0,)],
                             dtype=[('', bool)] * (D - 1) + [('', int)]),
                    mask=[(False,) * (D - 1) + (True,)])[0]
                samples = [
                    s.sample_post_pred(query, r)[1][0][-1] for _ in xrange(30)]
                samples = np.bincount(samples, minlength=len(classes))
                prediction = np.argmax(samples)
                results.append((classmap[c], prediction, samples))
            print 'finished predictions for class', c

        Y_actual = np.array([a for a, _, _ in results], dtype=np.int)
        Y_pred = np.array([b for _, b, _ in results], dtype=np.int)
        print 'accuracy:', accuracy_score(Y_actual, Y_pred)
        print 'confusion matrix:'
        print confusion_matrix(Y_actual, Y_pred)

        # AUROC for one vs all (each class)
        for i, clabel in enumerate(classes):
            Y_true = np.copy(Y_actual)

            # treat class c as the "positive" example
            positive_examples = Y_actual == i
            negative_examples = Y_actual != i
            Y_true[positive_examples] = 1
            Y_true[negative_examples] = 0
            Y_prob = np.array([float(c[i]) / c.sum() for _, _, c in results])
            cls_auc = roc_auc_score(Y_true, Y_prob)
            print 'class', clabel, 'auc=', cls_auc

        #import matplotlib.pylab as plt
        #Y_prob = np.array([c for _, _, c in results])
        #fpr, tpr, thresholds = roc_curve(Y_actual, Y_prob, pos_label=0)
        #plt.plot(fpr, tpr)
        #plt.show()

    def kernel(rid):
        start0 = time.time()
        assign(bound_s, r)
        sec0 = time.time() - start0

        start1 = time.time()
        hp(bound_s, r, hparams=hparams)
        sec1 = time.time() - start1

        print 'rid=', rid, 'nclusters=', s.ngroups(), \
            'iter0=', sec0, 'sec', 'iter1=', sec1, 'sec'

        sec_per_post_pred = sec0 / (float(view.size()) * (float(s.ngroups())))
        print '  time_per_post_pred=', sec_per_post_pred, 'sec'

        # print group size breakdown
        sizes = [(gid, s.groupsize(gid)) for gid in s.groups()]
        sizes = sorted(sizes, key=lambda x: x[1], reverse=True)
        print '  group_sizes=', sizes

        print_prediction_results()

        # save state
        mkdirp("mnist-states")
        fname = os.path.join("mnist-states", "state-iter{}.ser".format(rid))
        with open(fname, "w") as fp:
            fp.write(s.serialize())

    # training
    iters = 30
    for rid in xrange(iters):
        kernel(rid)
Exemplo n.º 25
0
def test_mnist():
    import matplotlib.pylab as plt
    from PIL import Image, ImageOps
    mnist_dataset = _get_mnist_dataset()
    Y_2 = mnist_dataset['data'][np.where(mnist_dataset['target'] == 2.)[0]]
    Y_3 = mnist_dataset['data'][np.where(mnist_dataset['target'] == 3.)[0]]
    print 'number of twos:', Y_2.shape[0]
    print 'number of threes:', Y_3.shape[0]
    _, D = Y_2.shape
    W = int(math.sqrt(D))
    assert W * W == D
    dtype = [('', bool)] * D
    Y = np.vstack([Y_2, Y_3])
    Y = np.array(
        [tuple(y) for y in Y[np.random.permutation(np.arange(Y.shape[0]))]],
        dtype=dtype)

    view = numpy_dataview(Y)
    defn = model_definition(Y.shape[0], [bb] * D)
    r = rng()
    s = initialize(defn,
                   view,
                   cluster_hp={'alpha': 0.2},
                   feature_hps=[{
                       'alpha': 1.,
                       'beta': 1.
                   }] * D,
                   r=r)
    bound_s = bind(s, view)

    indiv_prior_fn = log_exponential(1.2)
    hparams = {
        i: {
            'alpha': (indiv_prior_fn, 1.5),
            'beta': (indiv_prior_fn, 1.5),
        }
        for i in xrange(D)
    }

    def plot_clusters(s, fname, scalebysize=False):
        hps = [s.get_feature_hp(i) for i in xrange(D)]

        def prior_prob(hp):
            return hp['alpha'] / (hp['alpha'] + hp['beta'])

        def data_for_group(gid):
            suffstats = [s.get_suffstats(gid, i) for i in xrange(D)]

            def prob(hp, ss):
                top = hp['alpha'] + ss['heads']
                bot = top + hp['beta'] + ss['tails']
                return top / bot

            probs = [prob(hp, ss) for hp, ss in zip(hps, suffstats)]
            return np.array(probs)

        def scale(d, weight):
            im = d.reshape((W, W))
            newW = max(int(weight * W), 1)
            im = Image.fromarray(im)
            im = im.resize((newW, newW))
            im = ImageOps.expand(im, border=(W - newW) / 2)
            im = np.array(im)
            a, b = im.shape
            #print 'a,b:', a, b
            if a < W:
                im = np.append(im, np.zeros(b)[np.newaxis, :], axis=0)
            elif a > W:
                im = im[:W, :]
            assert im.shape[0] == W
            if b < W:
                #print 'current:', im.shape
                im = np.append(im, np.zeros(W)[:, np.newaxis], axis=1)
            elif b > W:
                im = im[:, :W]
            assert im.shape[1] == W
            return im.flatten()

        data = [(data_for_group(g), cnt) for g, cnt in groupsbysize(s)]
        largest = max(cnt for _, cnt in data)
        data = [
            scale(d, cnt / float(largest)) if scalebysize else d
            for d, cnt in data
        ]
        digits_per_row = 12
        rem = len(data) % digits_per_row
        if rem:
            fill = digits_per_row - rem
            for _ in xrange(fill):
                data.append(np.zeros(D))
        assert not (len(data) % digits_per_row)
        #rows = len(data) / digits_per_row
        data = np.vstack([
            np.hstack([d.reshape((W, W)) for d in data[i:i + digits_per_row]])
            for i in xrange(0, len(data), digits_per_row)
        ])
        #print 'saving figure', fname
        plt.imshow(data, cmap=plt.cm.binary, interpolation='nearest')
        plt.savefig(fname)
        plt.close()

    def plot_hyperparams(s, fname):
        hps = [s.get_feature_hp(i) for i in xrange(D)]
        alphas = np.array([hp['alpha'] for hp in hps])
        betas = np.array([hp['beta'] for hp in hps])
        data = np.hstack([alphas.reshape((W, W)), betas.reshape((W, W))])
        plt.imshow(data, interpolation='nearest')
        plt.colorbar()
        plt.savefig(fname)
        plt.close()

    def kernel(rid):
        start0 = time.time()
        assign(bound_s, r)
        sec0 = time.time() - start0

        start1 = time.time()
        hp(bound_s, r, hparams=hparams)
        sec1 = time.time() - start1

        print 'rid=', rid, 'nclusters=', s.ngroups(), \
            'iter0=', sec0, 'sec', 'iter1=', sec1, 'sec'

        sec_per_post_pred = sec0 / (float(view.size()) * (float(s.ngroups())))
        print '  time_per_post_pred=', sec_per_post_pred, 'sec'

        return s.score_joint(r)

    # burnin
    burnin = 20
    for rid in xrange(burnin):
        print 'score:', kernel(rid)
    print 'finished burnin'
    plot_clusters(s, 'mnist_clusters.pdf')
    plot_clusters(s, 'mnist_clusters_bysize.pdf', scalebysize=True)
    plot_hyperparams(s, 'mnist_hyperparams.pdf')
    print 'groupcounts:', groupcounts(s)

    # posterior predictions
    present = D / 2
    absent = D - present
    queries = [tuple(Y_2[i]) for i in np.random.permutation(Y_2.shape[0])[:4]] + \
              [tuple(Y_3[i]) for i in np.random.permutation(Y_3.shape[0])[:4]]

    queries_masked = ma.masked_array(np.array(queries, dtype=[('', bool)] * D),
                                     mask=[(False, ) * present +
                                           (True, ) * absent])

    def postpred_sample(y_new):
        Y_samples = [s.sample_post_pred(y_new, r)[1] for _ in xrange(1000)]
        Y_samples = np.array([list(y) for y in np.hstack(Y_samples)])
        Y_avg = Y_samples.mean(axis=0)
        return Y_avg

    queries_masked = [postpred_sample(y) for y in queries_masked]
    data0 = np.hstack([q.reshape((W, W)) for q in queries_masked])
    data1 = np.hstack([
        np.clip(np.array(q, dtype=np.float), 0., 1.).reshape((W, W))
        for q in queries
    ])
    data = np.vstack([data0, data1])
    plt.imshow(data, cmap=plt.cm.binary, interpolation='nearest')
    plt.savefig('mnist_predict.pdf')
    plt.close()