Exemplo n.º 1
0
def main(G=5000000,iterations=50000,init_matrix=None,init_mu=None,verbose=True):
    """Test case for FD-inference"""
    print "generating genome"
    genome = random_site(G)
    print "generating eps"
    eps = score_genome_np(TRUE_ENERGY_MATRIX,genome)
    min_mu,max_mu = -40,0
    mu = bisect_interval(lambda mu:np.sum(fd_solve_np(eps,mu))-q,min_mu,max_mu,verbose=True,tolerance=1e-1)
    print "computing ps"
    true_ps = fd_solve_np(eps,mu)
    print "true q:",np.sum(true_ps)
    print "generating chip dataset"
    mapped_reads = np.array(map_reads_np(chip_ps_np(true_ps,MEAN_FRAGMENT_LENGTH,NUM_CELLS_ORIGINAL),G))
    print "finished chip dataset"
    if init_matrix is None:
        init_matrix = random_energy_matrix(w)
    if init_mu is None:
        init_mu = -20#random.random()*40 - 20
    init_scores = score_genome_np(init_matrix,genome)
    init_state = ((init_matrix,init_mu),init_scores)
    logf = lambda state:complete_log_likelihood(state,mapped_reads)
    print "true mu:",mu
    print "true log_likelihood:",logf(((TRUE_ENERGY_MATRIX,mu),eps))
    rprop = lambda state:complete_rprop(state,genome)
    print "hitting mh loop"
    matrix_chain = mh(logf,proposal=rprop,x0=init_state,dprop=log_dprop,capture_state=capture_state,verbose=verbose,use_log=True,iterations=iterations,modulus=100)
    return matrix_chain,genome,mapped_reads
Exemplo n.º 2
0
    
def capture_state((mat_and_mu,site_scores)):
    return mat_and_mu

def complete_log_likelihood(((matrix,mu),eps),mapped_reads,num_cells=NUM_CELLS_RECOVERED):
    """Compute log likelihood of matrix, given chip seq data"""
    print "entering complete log likelihood"
    ps = np.append(fd_solve_np(eps,mu),[0]*(w-1))
    G = len(ps)
    #print "G=",G
    # if random.random() < 1:#0.01:
    #     pprint(matrix)
    print "mean copy number:",np.sum(ps),"mu:",mu
    #print "predicting mapped_reads"
    #predicted_coverage_probability = predict_chip_ps4(ps,MEAN_FRAGMENT_LENGTH,1) # XXX HACK
    proposed_reads = map_reads_np(chip_ps_np(ps,MEAN_FRAGMENT_LENGTH,num_cells),G)
    #print "predicted mapped_reads"
    # add laplacian pseudocount: one observation of hit and miss each
    predicted_coverage_probability = (np.array(proposed_reads,dtype=float)+1)/(num_cells+2)
    #print "computing likelihood"
    #print "pearson correlation between true, recovered datasets:",pearsonr(proposed_reads,mapped_reads)
    ans = chip_seq_log_likelihood(predicted_coverage_probability,mapped_reads,NUM_CELLS_ORIGINAL)
    if True:#random.random() < 0.01:
        pprint(matrix)
        print "mu:",mu
        print "log likelihood:",ans
        print "returning from complete log likelihood"
    return ans

def log_likelihood_from_state(matrix,mu,genome,mapped_reads,num_cells=NUM_CELLS_RECOVERED):
        eps = score_genome_np(matrix,genome)