def infer_synthetic_energy_model(num_reads=100000): """the whole show: infer the energy model from true reads""" G = len(genome) w = 10 true_matrix = [[-2, 0, 0, 0] for _ in range(w)] true_mu = -20 true_eps = score_genome_np(true_matrix, genome) true_ps = fd_solve_np(true_eps, true_mu) MFL = 250 #mean frag length = 250bp lamb = 1/250.0 true_reads = reads_from_ps(true_ps, MFL, min_seq_len=75, num_reads=num_reads) true_rdm = density_from_reads(true_reads, G) init_matrix = random_energy_matrix(w) init_mu = -20 init_scores = score_genome_np(init_matrix, genome) init_state = ((init_matrix, init_mu), init_scores) logf = lambda state:timestamp(complete_log_likelihood(state, true_rdm, lamb, num_reads=num_reads)) rprop = lambda state:complete_rprop(state, genome) verbose = True iterations = 50000 print "true_ll:", logf(((true_matrix, true_mu), true_eps)) matrix_chain = mh(logf, proposal=rprop, x0=init_state, dprop=log_dprop, capture_state=capture_state, verbose=verbose, use_log=True, iterations=iterations, modulus=100) return matrix_chain
def infer_arca_energy_model(num_reads=1000000): """the whole show: infer the energy model from true reads""" true_reads = get_arca_reads(num_reads) G = len(genome) lamb = 1/250.0 true_rdm = density_from_reads(true_reads, G) w = 10 init_matrix = random_energy_matrix(w) init_mu = -20 init_scores = score_genome_np(init_matrix, genome) init_state = ((init_matrix, init_mu), init_scores) logf = lambda state:timestamp(complete_log_likelihood(state, true_rdm, lamb, num_reads)) rprop = lambda state:complete_rprop(state, genome) verbose = True iterations = 50000 matrix_chain = mh(logf, proposal=rprop, x0=init_state, dprop=log_dprop, capture_state=capture_state, verbose=verbose, use_log=True, iterations=iterations, modulus=100) return matrix_chain