def test_log_marginal_likelyhood_exact(): def exact_function(counts, alpha, beta): counts_facproduct = functools.reduce(operator.mul, map(np.math.factorial, counts), 1) cs = sum(counts) ns = len(counts) return np.log((beta**alpha) * math.gamma(cs + alpha) / (math.gamma(alpha) * counts_facproduct * ((ns + beta)**(cs + alpha)))) scorer = pasio.LogMarginalLikelyhoodComputer(np.array([0]), 3, 5, None) assert np.allclose(scorer.log_marginal_likelyhood(), exact_function(np.array([0]), 3, 5)) scorer = pasio.LogMarginalLikelyhoodComputer(np.array([0, 1]), 3, 5, None) assert np.allclose(scorer.log_marginal_likelyhood(), exact_function(np.array([0, 1]), 3, 5)) scorer = pasio.LogMarginalLikelyhoodComputer(np.array([4, 0, 1, 3]), 5, 2, None) assert np.allclose(scorer.log_marginal_likelyhood(), exact_function(np.array([4, 0, 1, 3]), 5, 2)) scorer = pasio.LogMarginalLikelyhoodComputer(np.array([4, 0, 1, 3]), 1, 1, None) assert np.allclose(scorer.log_marginal_likelyhood(), exact_function(np.array([4, 0, 1, 3]), 1, 1))
def test_stat_split_into_segments_square(): def split_on_two_segments_or_not(counts, scorer_factory): scorer = scorer_factory(counts) best_score = scorer.score(0, len(counts)) split_point = 0 for i in range(len(counts)): current_score = scorer.score(stop=i) + scorer.score(start=i) if current_score > best_score: split_point = i best_score = current_score return best_score, split_point np.random.seed(4) scorer_factory = lambda counts, split_candidates=None: pasio.LogMarginalLikelyhoodComputer( counts, 1, 1, split_candidates) for repeat in range(5): counts = np.concatenate( [np.random.poisson(15, 100), np.random.poisson(20, 100)]) optimal_split = pasio.SquareSplitter().split(counts, scorer_factory) two_split = split_on_two_segments_or_not(counts, scorer_factory) assert optimal_split[0] >= two_split[0] assert two_split[1] in optimal_split[1] assert np.allclose( optimal_split[0], pasio.compute_score_from_splits(counts, optimal_split[1], scorer_factory)) if (two_split[1] is None): assert optimal_split[1] == [0, 200] else: assert abs(two_split[1] - 100) < 10
def test_benchmark_log_marginal_likehood(benchmark): counts = np.concatenate( [np.random.poisson(200, 50), np.random.poisson(20, 50)]) scorer = pasio.LogMarginalLikelyhoodComputer(counts, 1, 1) result = benchmark(compute_log_marginal_likelyhood2, scorer, len(counts))
def test_benchmark_segmentation_long(benchmark): np.random.seed(2) counts = np.concatenate( [np.random.poisson(15, 500), np.random.poisson(20, 500)]) scorer_factory = lambda counts, split_candidates=None: pasio.LogMarginalLikelyhoodComputer( counts, 1, 1, split_candidates) result = benchmark(segmentation, counts, scorer_factory)
def test_suffixes_scores(): np.random.seed(2) counts = np.concatenate( [np.random.poisson(15, 100), np.random.poisson(20, 100)]) scorer = pasio.LogMarginalLikelyhoodComputer(counts, 1, 1) suffixes_scores = [scorer.self_score(i, 150) for i in range(150)] assert np.allclose(scorer.all_suffixes_self_score(150), np.array(suffixes_scores)) counts = np.array([0, 0, 1, 0, 0, 2, 2, 2, 10, 11, 100, 1, 0, 0, 1, 0], dtype='int64') scorer = pasio.LogMarginalLikelyhoodComputer(counts, 1, 1) suffixes_scores = [ scorer.self_score(i, len(counts) - 1) for i in range(len(counts) - 1) ] assert np.allclose(scorer.all_suffixes_self_score(len(counts) - 1), np.array(suffixes_scores))
def test_suffixes_scores_with_candidates(): np.random.seed(2) counts = np.arange(1, 10) scorer = pasio.LogMarginalLikelyhoodComputer(counts, 1, 1) candidates = np.array([0, 1, 3, 4, 5, 6, 7, 8, 9]) scorer_with_candidates = pasio.LogMarginalLikelyhoodComputer( counts, 1, 1, split_candidates=candidates) candidate_suffixes = scorer.all_suffixes_self_score(9)[candidates[:-1]] suffixes_just_candidates = scorer_with_candidates.all_suffixes_self_score( 8) assert np.allclose(candidate_suffixes, suffixes_just_candidates) counts = np.concatenate( [np.random.poisson(15, 100), np.random.poisson(20, 100)]) scorer = pasio.LogMarginalLikelyhoodComputer(counts, 1, 1) candidates = np.array([0, 1, 10, 20, 21, 30, 40, 149]) scorer_with_candidates = pasio.LogMarginalLikelyhoodComputer( counts, 1, 1, split_candidates=candidates) candidate_suffixes = scorer.all_suffixes_self_score(149)[candidates[:-1]] suffixes_just_candidates = scorer_with_candidates.all_suffixes_self_score( len(candidates) - 1) assert np.allclose(candidate_suffixes, suffixes_just_candidates)