def main(G=5000000,iterations=50000,init_matrix=None,init_mu=None,verbose=True): """Test case for FD-inference""" print "generating genome" genome = random_site(G) print "generating eps" eps = score_genome_np(TRUE_ENERGY_MATRIX,genome) min_mu,max_mu = -40,0 mu = bisect_interval(lambda mu:np.sum(fd_solve_np(eps,mu))-q,min_mu,max_mu,verbose=True,tolerance=1e-1) print "computing ps" true_ps = fd_solve_np(eps,mu) print "true q:",np.sum(true_ps) print "generating chip dataset" mapped_reads = np.array(map_reads_np(chip_ps_np(true_ps,MEAN_FRAGMENT_LENGTH,NUM_CELLS_ORIGINAL),G)) print "finished chip dataset" if init_matrix is None: init_matrix = random_energy_matrix(w) if init_mu is None: init_mu = -20#random.random()*40 - 20 init_scores = score_genome_np(init_matrix,genome) init_state = ((init_matrix,init_mu),init_scores) logf = lambda state:complete_log_likelihood(state,mapped_reads) print "true mu:",mu print "true log_likelihood:",logf(((TRUE_ENERGY_MATRIX,mu),eps)) rprop = lambda state:complete_rprop(state,genome) print "hitting mh loop" matrix_chain = mh(logf,proposal=rprop,x0=init_state,dprop=log_dprop,capture_state=capture_state,verbose=verbose,use_log=True,iterations=iterations,modulus=100) return matrix_chain,genome,mapped_reads
def capture_state((mat_and_mu,site_scores)): return mat_and_mu def complete_log_likelihood(((matrix,mu),eps),mapped_reads,num_cells=NUM_CELLS_RECOVERED): """Compute log likelihood of matrix, given chip seq data""" print "entering complete log likelihood" ps = np.append(fd_solve_np(eps,mu),[0]*(w-1)) G = len(ps) #print "G=",G # if random.random() < 1:#0.01: # pprint(matrix) print "mean copy number:",np.sum(ps),"mu:",mu #print "predicting mapped_reads" #predicted_coverage_probability = predict_chip_ps4(ps,MEAN_FRAGMENT_LENGTH,1) # XXX HACK proposed_reads = map_reads_np(chip_ps_np(ps,MEAN_FRAGMENT_LENGTH,num_cells),G) #print "predicted mapped_reads" # add laplacian pseudocount: one observation of hit and miss each predicted_coverage_probability = (np.array(proposed_reads,dtype=float)+1)/(num_cells+2) #print "computing likelihood" #print "pearson correlation between true, recovered datasets:",pearsonr(proposed_reads,mapped_reads) ans = chip_seq_log_likelihood(predicted_coverage_probability,mapped_reads,NUM_CELLS_ORIGINAL) if True:#random.random() < 0.01: pprint(matrix) print "mu:",mu print "log likelihood:",ans print "returning from complete log likelihood" return ans def log_likelihood_from_state(matrix,mu,genome,mapped_reads,num_cells=NUM_CELLS_RECOVERED): eps = score_genome_np(matrix,genome)