def test_pickle(self): models = [ Mixture(dim=5), MoGSM(dim=3, num_components=4, num_scales=7)] for _ in range(3): models[0].add_component(GSM(models[0].dim, 7)) for model0 in models: tmp_file = mkstemp()[1] # store model with open(tmp_file, 'w') as handle: dump({'model': model0}, handle) # load model with open(tmp_file) as handle: model1 = load(handle)['model'] # make sure parameters haven't changed self.assertEqual(model0.dim, model1.dim) self.assertEqual(model0.num_components, model1.num_components) for k in range(model0.num_components): self.assertLess(max(abs(model0[k].scales - model0[k].scales)), 1e-10) self.assertLess(max(abs(model0[k].priors - model1[k].priors)), 1e-10) self.assertLess(max(abs(model0[k].mean - model1[k].mean)), 1e-10) self.assertLess(max(abs(model0[k].covariance - model1[k].covariance)), 1e-10)
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--data_train', '-d', type=str, default='data/BSDS300_train.mat') parser.add_argument('--data_test', '-t', type=str, default='data/BSDS300_test.mat') parser.add_argument('--patch_size', '-p', type=int, default=8) args = parser.parse_args(argv[1:]) A = eye(args.patch_size) - 1. / args.patch_size**2 A[-1] = 1. / args.patch_size**2 logjacobian = slogdet(A)[1] data_train = loadmat(args.data_train)['data'] data_test = loadmat(args.data_test)['data'] dc_train = dc_component(data_train, args.patch_size) dc_test = dc_component(data_test, args.patch_size) h_train, bins = histogram(dc_train, 60, density=True) h_test, bins = histogram(dc_test, bins, density=False) model = MoGSM(dim=1, num_components=16, num_scales=4) model.train(dc_train, parameters={'max_iter': 100}) figure(sans_serif=True) t = linspace(0, 1, 100) hist(dc_train.ravel(), 100, density=True) plot(t, exp(model.loglikelihood(t[None]).ravel()), 'k', line_width=2) axis(width=5, height=5) savefig('dc_fit.tex') loglik = mean(model.loglikelihood(dc_test)) print 'Add these two numbers to your results:' print 'Log-likelihood (MoGSM): {0:.4f} [nat]'.format(loglik) print 'Log-likelihood (histogram): {0:.4f} [nat]'.format( sum(h_test * log(h_train)) / sum(h_test)) print 'Log-Jacobian: {0:.4f} [nat]'.format(logjacobian) return 0
def test_basics(self): model = MoGSM(1, 4, 1) model.priors = arange(model.num_components) + 1. model.priors = model.priors / sum(model.priors) for k in range(model.num_components): model[k].mean = [[k]] model[k].scales = [[1000.]] n = 1000 samples = asarray(model.sample(n) + .5, dtype=int) for k in range(model.num_components): p = model.priors.ravel()[k] x = sum(samples == k) c = binom.cdf(x, n, p) self.assertGreater(c, 1e-5) self.assertGreater(1. - c, 1e-5)
def test_mogsm(self): mcgsm = MCGSM(dim_in=0, dim_out=3, num_components=2, num_scales=2, num_features=0) p0 = 0.3 p1 = 0.7 N = 20000 m0 = array([[2], [0], [0]]) m1 = array([[0], [2], [1]]) C0 = cov(randn(mcgsm.dim_out, mcgsm.dim_out**2)) C1 = cov(randn(mcgsm.dim_out, mcgsm.dim_out**2)) input = zeros([0, N]) output = hstack([ dot(cholesky(C0), randn(mcgsm.dim_out, round(p0 * N))) + m0, dot(cholesky(C1), randn(mcgsm.dim_out, round(p1 * N))) + m1 ]) * (rand(1, N) + 0.5) mcgsm.train(input, output, parameters={ 'verbosity': 0, 'max_iter': 10, 'train_means': True }) mogsm = MoGSM(3, 2, 2) # translate parameters from MCGSM to MoGSM mogsm.priors = sum(exp(mcgsm.priors), 1) / sum(exp(mcgsm.priors)) for k in range(mogsm.num_components): mogsm[k].mean = mcgsm.means[:, k] mogsm[k].covariance = inv( dot(mcgsm.cholesky_factors[k], mcgsm.cholesky_factors[k].T)) mogsm[k].scales = exp(mcgsm.scales[k, :]) mogsm[k].priors = exp(mcgsm.priors[k, :]) / sum( exp(mcgsm.priors[k, :])) self.assertAlmostEqual(mcgsm.evaluate(input, output), mogsm.evaluate(output), 5) mogsm_samples = mogsm.sample(N) mcgsm_samples = mcgsm.sample(input) # generated samples should have the same distribution for i in range(mogsm.dim): self.assertTrue( ks_2samp(mogsm_samples[i], mcgsm_samples[0]) > 0.0001) self.assertTrue( ks_2samp(mogsm_samples[i], mcgsm_samples[1]) > 0.0001) self.assertTrue( ks_2samp(mogsm_samples[i], mcgsm_samples[2]) > 0.0001) posterior = mcgsm.posterior(input, mcgsm_samples) # average posterior should correspond to prior for k in range(mogsm.num_components): self.assertLess(abs(1 - mean(posterior[k]) / mogsm.priors[k]), 0.1)
def main(argv): parser = ArgumentParser(argv[0], description=__doc__) parser.add_argument('--data', '-d', type=str, default='data/BSDS300_8x8.mat') parser.add_argument('--num_train', '-N', type=int, default=1000000) parser.add_argument('--num_valid', '-V', type=int, default=200000) parser.add_argument('--num_components', '-n', type=int, default=128) parser.add_argument('--num_scales', '-s', type=int, default=4) parser.add_argument('--num_features', '-f', type=int, default=48) parser.add_argument('--train_means', '-M', type=int, default=0) parser.add_argument('--indices', '-I', type=int, default=[], nargs='+') parser.add_argument('--initialize', '-i', type=str, default=None) parser.add_argument('--verbosity', '-v', type=int, default=1) parser.add_argument('--max_iter', '-m', type=int, default=2000) args = parser.parse_args(argv[1:]) experiment = Experiment() data_train = loadmat(args.data)['patches_train'] data_valid = loadmat(args.data)['patches_valid'] if args.initialize: results = Experiment(args.initialize) models = results['models'] preconditioners = results['preconditioners'] else: models = [None] * data_train.shape[1] preconditioners = [None] * data_train.shape[1] def preprocess(data, i, N): if N > 0 and N < data.shape[0]: # select subset of data idx = random_select(N, data.shape[0]) return data[idx, :i].T, data[idx, i][None, :] return data.T[:i], data.T[[i]] for i in range(data_train.shape[1]): if args.indices and i not in args.indices: # skip this one continue print 'Training model {0}/{1}...'.format(i + 1, data_train.shape[1]) inputs_train, outputs_train = preprocess(data_train, i, args.num_train) inputs_valid, outputs_valid = preprocess(data_valid, i, args.num_valid) if i > 0: if preconditioners[i] is None: preconditioners[i] = WhiteningPreconditioner( inputs_train, outputs_train) inputs_train, outputs_train = preconditioners[i](inputs_train, outputs_train) inputs_valid, outputs_valid = preconditioners[i](inputs_valid, outputs_valid) if models[i] is None: models[i] = MCGSM(dim_in=i, dim_out=1, num_components=args.num_components, num_features=args.num_features, num_scales=args.num_scales) models[i].train(inputs_train, outputs_train, inputs_valid, outputs_valid, parameters={ 'verbosity': 1, 'max_iter': args.max_iter, 'train_means': args.train_means > 0 }) else: preconditioners[i] = None if models[i] is None: models[i] = MoGSM(dim=1, num_components=4, num_scales=8) models[i].train(outputs_train, outputs_valid, parameters={ 'verbosity': 1, 'threshold': -1., 'train_means': 1, 'max_iter': 100 }) experiment['args'] = args experiment['models'] = models experiment['preconditioners'] = preconditioners experiment.save( 'results/BSDS300/snapshots/mcgsm_{0}_{1}.{{0}}.{{1}}.xpck'.format( i, args.num_components)) if not args.indices: experiment['args'] = args experiment['models'] = models experiment['preconditioners'] = preconditioners experiment.save('results/BSDS300/mcgsm.{0}.{1}.xpck') return 0