def test_sampled_data(self): "Test HDPM with uncertainty on sampled data sets of different sizes." for i, (F, K, G, average_n_g) in enumerate(( ( 2, 10, 20, 5), ( 2, 4, 20, 10), ( 2, 10, 20, 10), ( 2, 10, 220, 50), (12, 80, 100, 50), (80, 6, 200, 200), )): numpy.random.seed(i+1) logging.debug('Testing sampled data with F=%d; K=%d; G=%d, average n_g=%d', F, K, G, average_n_g) options = U.get_default_options() options.a_tau = numpy.ones(F) / F options.a_omega = numpy.ones(F) / F rho = U.sample_rho(G, average_n_g=average_n_g) sample = U.sample(options, rho, K, F) genes = U.genes_from_sites(sample.sites, rho) data = U.Data(genes, F, options) dist = U.VariationalDistribution(data, K) test_log_likelihood_per_update(dist)
logging.basicConfig(level=logging.DEBUG) F, K, G, average_n_g = (12, 80, 100, 50) numpy.random.seed(2) logging.debug( 'Testing sampled data with F=%d; K=%d; G=%d, average n_g=%d', F, K, G, average_n_g) options = U.get_default_options() options.a_tau = numpy.ones(F) options.a_omega = numpy.ones(F) rho = U.sample_rho(G, average_n_g=average_n_g) sample = U.sample(options, rho, K, F) genes = U.genes_from_sites(sample.sites, rho) data = U.Data(genes, F, options) dist = U.VariationalDistribution(data, K) Summariser(dist, 'output/sampled/summary').summarise_all() history = InferenceHistory(dist) LL = dist.log_likelihood() LL_tolerance = 1e-8 * data.N logging.info('Tolerance in LL: %e', LL_tolerance) max_iters = 50 convergence_test = LlConvergenceTest( eps=LL_tolerance, use_absolute_difference=True) for i in range(max_iters): dist.update() history.update()