def gradient_descent_experiment(true_rdm=None, num_reads=100000): #genome = get_ecoli_genome(at_lab=False) G = len(genome) w = 10 mfl = 250 lamb = 1.0/mfl simulating_data = False if true_rdm is None: simulating_data = True true_matrix = [[-2, 0, 0, 0] for i in range(w)] true_mu = -20 true_eps = score_genome_np(true_matrix, genome) true_ps = fd_solve_np(true_eps, true_mu) true_reads = reads_from_ps(true_ps, mfl, min_seq_len=75, num_reads=num_reads) true_rdm = density_from_reads(true_reads, G) true_state = ((true_matrix, true_mu), true_eps) true_ll = logf(true_state) if simulating_data else None matrix = random_energy_matrix(w) mu = -20 eps = score_genome_np(matrix, genome) init_state = ((matrix, mu), eps) logf = lambda state:timestamp(complete_log_likelihood(state, true_rdm, lamb, num_reads=num_reads)) dw = 0.1 dmu = 0.1 old_ll = 0 print "true_ll:", true_ll cur_ll = logf(init_state) eta = 10**-7 # learning rate iterations = 0 while cur_ll > old_ll or iterations == 0: old_ll = cur_ll dmat = [[0]*4 for i in range(w)] for i in range(w): for j in range(4): print "i, j:", i, j new_mat = [row[:] for row in matrix] new_mat[i][j] += dw fwd_eps, rev_eps = eps new_eps = update_scores_np(fwd_eps, rev_eps, i, j, dw, w, genome) new_state = ((new_mat, mu), new_eps) new_ll = logf(new_state) print "cur ll, new_ll:", cur_ll, new_ll, "(improvement)" if new_ll > cur_ll else "(worsening)" delta_w = (new_ll - cur_ll)/dw * eta print "delta_w:", delta_w dmat[i][j] = delta_w new_mu = mu + dmu new_state = ((matrix, new_mu), eps) new_ll = logf(new_state) print "mu:" print "cur ll, new_ll:", cur_ll, new_ll, "(improvement)" if new_ll > cur_ll else "(worsening)" delta_mu = (new_ll - cur_ll)/dmu * eta print "delta_mu:", delta_mu old_matrix = [row[:] for row in matrix] for i in range(w): for j in range(4): matrix[i][j] += dmat[i][j] old_eps = np.array(eps) eps = score_genome_np(matrix, genome) old_mu = mu mu += delta_mu cur_state = ((matrix, mu), eps) cur_ll = logf(cur_state) print "\nresults of iteration %s:" % iterations pprint(matrix) print mu print "likelihood:", old_ll, "->", cur_ll iterations += 1 return ((old_matrix, old_mu), old_eps)
MU_SIGMA = 0.1 def complete_rprop(((mat,mu),(fwd_eps,rev_eps)),genome): """Propose a new matrix and new mu, given mat,mu. Return updated scores for convenience""" pprint(mat) print "mu:",mu w = len(mat) new_mat = [row[:] for row in mat] # make a copy of the matrix new_mu = mu if random.random() < 0.5: # flip a coin and update weight matrix or mu altered_col = random.randrange(w) # pick a column to alter altered_row = random.randrange(4) # pick a row to alter dw = random.gauss(0,MAT_SIGMA) # add N(0,2) noise new_mat[altered_col][altered_row] += dw new_fwd_eps,new_rev_eps = update_scores_np(fwd_eps,rev_eps,altered_col,altered_row,dw,w,genome) else: new_mu += random.gauss(0,MU_SIGMA) new_fwd_eps,new_rev_eps = fwd_eps,rev_eps # careful about returning copy...? return ((new_mat,new_mu),(new_fwd_eps,new_rev_eps)) def log_dprop(((matp,mup),epsp),((mat,mu),eps)): dmat = sum([xp - x for (rowp,row) in zip(matp,mat) for (xp,x) in zip(rowp,row)]) dmu = mup - mu if dmat != 0: return log(1/2.0 * dnorm(dmat,0,MAT_SIGMA)) else: return log(1/2.0 * dnorm(dmu,0,MAT_SIGMA)) #return log(dnorm(dmat,0,MAT_SIGMA)) + log(dnorm(dmu,0,MU_SIGMA)) def capture_state((mat_and_mu,site_scores)):