def test_hand_made_data_set(self): "Test HDPM with uncertainty on small hand-crafted data set." numpy.random.seed(37) F = 12 K = 5 genes = [ [ (0, [.1, .2, .99]), ], [ (3, [.9, .9]), (2, [.2, .99]), (4, [.2, .99]), (5, [.2, .99]), ], [ (0, [.1, .2, .99]), (2, [.1, .2]), (10, [.1, .2, .99]), (11, [.1, .2]), (9, [.1, .2]), ], ] options = U.get_default_options() data = U.Data(genes, F, options) dist = U.VariationalDistribution(data, K) test_log_likelihood_per_update(dist) stats = Statistics(dist) stats.log()
def test_sampled_data(self): "Test HDPM with uncertainty on sampled data sets of different sizes." for i, (F, K, G, average_n_g) in enumerate(( ( 2, 10, 20, 5), ( 2, 4, 20, 10), ( 2, 10, 20, 10), ( 2, 10, 220, 50), (12, 80, 100, 50), (80, 6, 200, 200), )): numpy.random.seed(i+1) logging.debug('Testing sampled data with F=%d; K=%d; G=%d, average n_g=%d', F, K, G, average_n_g) options = U.get_default_options() options.a_tau = numpy.ones(F) / F options.a_omega = numpy.ones(F) / F rho = U.sample_rho(G, average_n_g=average_n_g) sample = U.sample(options, rho, K, F) genes = U.genes_from_sites(sample.sites, rho) data = U.Data(genes, F, options) dist = U.VariationalDistribution(data, K) test_log_likelihood_per_update(dist)
reload(infpy.dp.hdpm.uncertainty) import infpy.dp.hdpm.uncertainty as U import infpy.dp.hdpm.uncertainty.summarise reload(infpy.dp.hdpm.uncertainty.summarise) from infpy.dp.hdpm.uncertainty.summarise import Statistics, InferenceHistory, Summariser from infpy.convergence_test import LlConvergenceTest logging.basicConfig(level=logging.DEBUG) F, K, G, average_n_g = (12, 80, 100, 50) numpy.random.seed(2) logging.debug( 'Testing sampled data with F=%d; K=%d; G=%d, average n_g=%d', F, K, G, average_n_g) options = U.get_default_options() options.a_tau = numpy.ones(F) options.a_omega = numpy.ones(F) rho = U.sample_rho(G, average_n_g=average_n_g) sample = U.sample(options, rho, K, F) genes = U.genes_from_sites(sample.sites, rho) data = U.Data(genes, F, options) dist = U.VariationalDistribution(data, K) Summariser(dist, 'output/sampled/summary').summarise_all() history = InferenceHistory(dist) LL = dist.log_likelihood() LL_tolerance = 1e-8 * data.N