def __call__(self, X): """ @param X: six params defining mutation and selection @return: negative log likelihood """ # define the hardcoded number of alleles k = 4 # unpack the params params = X.tolist() theta, ka, kb, g0, g1, g2 = params if any(x < 0 for x in (theta, ka, kb)): return float('inf') mutation, fitnesses = kaizeng.params_to_mutation_fitness( self.N, params) # get the transition matrix P = kaizeng.get_transition_matrix(self.N, k, mutation, fitnesses) v = MatrixUtil.get_stationary_distribution(P) return -StatsUtil.multinomial_log_pmf(v, self.observed_counts)
def get_response_content(fs): np.set_printoptions(linewidth=200) out = StringIO() nsamples = 1 arr = [] # nsites = 50000 N = 15*2 k = 4 params = (0.002, 1, 1, 0, 0, 0) #params = (0.008, 1, 1, 0.5, 1, 1.5) mutation, fitnesses = kaizeng.params_to_mutation_fitness(N, params) # tm = time.time() P = kaizeng.get_transition_matrix(N, k, mutation, fitnesses) print 'time to construct transition matrix:', time.time() - tm # tm = time.time() v = MatrixUtil.get_stationary_distribution(P) print 'time to get stationary distribution:', time.time() - tm # tm = time.time() counts = np.random.multinomial(nsites, v) print 'time to sample multinomial counts:', time.time() - tm # tm = time.time() logp = StatsUtil.multinomial_log_pmf(v, counts) print 'time to get multinomial log pmf:', time.time() - tm # for i in range(nsamples): counts = np.random.multinomial(nsites, v) X0 = np.array(params) g = G(N, counts) Xopt = optimize.fmin(g, X0) arr.append(Xopt) print >> out, np.array(arr) return out.getvalue()
def get_response_content(fs): np.set_printoptions(linewidth=200) out = StringIO() nsamples = 1 arr = [] # nsites = 50000 N = 15 * 2 k = 4 params = (0.002, 1, 1, 0, 0, 0) #params = (0.008, 1, 1, 0.5, 1, 1.5) mutation, fitnesses = kaizeng.params_to_mutation_fitness(N, params) # tm = time.time() P = kaizeng.get_transition_matrix(N, k, mutation, fitnesses) print 'time to construct transition matrix:', time.time() - tm # tm = time.time() v = MatrixUtil.get_stationary_distribution(P) print 'time to get stationary distribution:', time.time() - tm # tm = time.time() counts = np.random.multinomial(nsites, v) print 'time to sample multinomial counts:', time.time() - tm # tm = time.time() logp = StatsUtil.multinomial_log_pmf(v, counts) print 'time to get multinomial log pmf:', time.time() - tm # for i in range(nsamples): counts = np.random.multinomial(nsites, v) X0 = np.array(params) g = G(N, counts) Xopt = optimize.fmin(g, X0) arr.append(Xopt) print >> out, np.array(arr) return out.getvalue()
def get_response_content(fs): N_small = 10 N_big_diploid = fs.N_big_diploid N_big_haploid = N_big_diploid * 2 if N_big_haploid < N_small: raise ValueError('use a larger diploid population size') if fs.with_replacement: f_subsample = StatsUtil.subsample_pmf_with_replacement elif fs.without_replacement: f_subsample = StatsUtil.subsample_pmf_without_replacement else: raise ValueError('subsampling option error') k = 4 gamma = fs.gamma_1 params_list = [ (0.008, 1, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2), (0.008, 2, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2)] allele_histograms = np.zeros((2, N_big_haploid + 1)) for i, params in enumerate(params_list): mutation, selection = kaizeng.params_to_mutation_fitness( N_big_haploid, params) P = kaizeng.get_transition_matrix( N_big_diploid, k, mutation, selection) v = MatrixUtil.get_stationary_distribution(P) for state_index, counts in enumerate(kaizeng.gen_states( N_big_haploid, k)): if counts[0] and counts[1]: allele_histograms[i, counts[0]] += v[state_index] # Define the r table. # There are nine columns each corresponding to an allele frequency. # There are three rows each corresponding to a configuration. arr = [] # Use the two allele approximation # from mcvean and charlesworth 1999 referred to by zeng 2011. # I'm not sure if I am using the right equation. g0 = fs.gamma_0 g1 = fs.gamma_1 """ s_0 = -gamma_0 / float(N_big) s_1 = -gamma_1 / float(N_big) hist = np.zeros(N_small+1) for i in range(1, N_small): x = i / float(N_small) hist[i] = math.exp(1*N_big*(s_0 - s_1)*x) / (x*(1-x)) h = hist[1:-1] h /= np.sum(h) arr.append(h.tolist()) """ arr.append(diallelic_approximation(N_small, g0, g1).tolist()) # Use the exact two allele distribution. # Well, it is exact if I understand the right scaling # of the population size and fitnesses. f0 = 1.0 f1 = 1.0 - gamma / N_big_haploid #f0 = 1.0 + gamma / N #f1 = 1.0 #f0 = 1.0 + 1.5 / (4*N) #f1 = 1.0 - 1.5 / (4*N) h = get_two_allele_distribution( N_big_haploid, N_small, f0, f1, f_subsample) arr.append(h.tolist()) # Get frequencies for the other two configurations for hist in allele_histograms: # Get probabilities conditional on dimorphism. hist[0] = 0 hist[-1] = 0 hist /= np.sum(hist) # Get the subsampled pmf. distn = f_subsample(hist, N_small) MatrixUtil.assert_distribution(distn) # Get probabiities conditional on dimorphism of the sample. distn[0] = 0 distn[-1] = 0 distn /= np.sum(distn) # Add to the table of densities. arr.append(distn[1:-1].tolist()) # Get a large population approximation # when there is mutational bias. params = (0.008, 2, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2) mutation, fitness = kaizeng.params_to_mutation_fitness( N_big_haploid, params) gammas = np.array([fs.gamma_0, fs.gamma_1, fs.gamma_2, 0]) h = kaizeng.get_large_population_approximation(N_small, k, gammas, mutation) arr.append(h.tolist()) # define the r script out = StringIO() print >> out, 'title.string <- "allele 1 vs allele 2"' print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr) print >> out, mk_call_str( 'barplot', 'mdat', 'legend.text=' + mk_call_str( 'c', '"two-allele large N limit"', '"two-allele"', '"four-allele without mutational bias"', '"four-allele with mutational bias (kappa_{1,2}=2)"', '"four-allele with mutational bias, large N limit"', ), 'args.legend = list(x="topleft", bty="n")', 'names.arg = c(1,2,3,4,5,6,7,8,9)', main='title.string', xlab='"frequency of allele 1"', ylab='"frequency"', col=mk_call_str( 'c', '"red"', '"white"', '"black"', '"gray"', '"blue"', ), beside='TRUE', ) #print >> out, 'box()' script = out.getvalue().rstrip() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table( script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_response_content(fs): N_small = 10 N_big_diploid = fs.N_big_diploid N_big_haploid = N_big_diploid * 2 if N_big_haploid < N_small: raise ValueError('use a larger diploid population size') if fs.with_replacement: f_subsample = StatsUtil.subsample_pmf_with_replacement elif fs.without_replacement: f_subsample = StatsUtil.subsample_pmf_without_replacement else: raise ValueError('subsampling option error') k = 4 gamma = fs.gamma_1 params_list = [(0.008, 1, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2), (0.008, 2, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2)] allele_histograms = np.zeros((2, N_big_haploid + 1)) for i, params in enumerate(params_list): mutation, selection = kaizeng.params_to_mutation_fitness( N_big_haploid, params) P = kaizeng.get_transition_matrix(N_big_diploid, k, mutation, selection) v = MatrixUtil.get_stationary_distribution(P) for state_index, counts in enumerate( kaizeng.gen_states(N_big_haploid, k)): if counts[0] and counts[1]: allele_histograms[i, counts[0]] += v[state_index] # Define the r table. # There are nine columns each corresponding to an allele frequency. # There are three rows each corresponding to a configuration. arr = [] # Use the two allele approximation # from mcvean and charlesworth 1999 referred to by zeng 2011. # I'm not sure if I am using the right equation. g0 = fs.gamma_0 g1 = fs.gamma_1 """ s_0 = -gamma_0 / float(N_big) s_1 = -gamma_1 / float(N_big) hist = np.zeros(N_small+1) for i in range(1, N_small): x = i / float(N_small) hist[i] = math.exp(1*N_big*(s_0 - s_1)*x) / (x*(1-x)) h = hist[1:-1] h /= np.sum(h) arr.append(h.tolist()) """ arr.append(diallelic_approximation(N_small, g0, g1).tolist()) # Use the exact two allele distribution. # Well, it is exact if I understand the right scaling # of the population size and fitnesses. f0 = 1.0 f1 = 1.0 - gamma / N_big_haploid #f0 = 1.0 + gamma / N #f1 = 1.0 #f0 = 1.0 + 1.5 / (4*N) #f1 = 1.0 - 1.5 / (4*N) h = get_two_allele_distribution(N_big_haploid, N_small, f0, f1, f_subsample) arr.append(h.tolist()) # Get frequencies for the other two configurations for hist in allele_histograms: # Get probabilities conditional on dimorphism. hist[0] = 0 hist[-1] = 0 hist /= np.sum(hist) # Get the subsampled pmf. distn = f_subsample(hist, N_small) MatrixUtil.assert_distribution(distn) # Get probabiities conditional on dimorphism of the sample. distn[0] = 0 distn[-1] = 0 distn /= np.sum(distn) # Add to the table of densities. arr.append(distn[1:-1].tolist()) # Get a large population approximation # when there is mutational bias. params = (0.008, 2, 1, fs.gamma_0, fs.gamma_1, fs.gamma_2) mutation, fitness = kaizeng.params_to_mutation_fitness( N_big_haploid, params) gammas = np.array([fs.gamma_0, fs.gamma_1, fs.gamma_2, 0]) h = kaizeng.get_large_population_approximation(N_small, k, gammas, mutation) arr.append(h.tolist()) # define the r script out = StringIO() print >> out, 'title.string <- "allele 1 vs allele 2"' print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr) print >> out, mk_call_str( 'barplot', 'mdat', 'legend.text=' + mk_call_str( 'c', '"two-allele large N limit"', '"two-allele"', '"four-allele without mutational bias"', '"four-allele with mutational bias (kappa_{1,2}=2)"', '"four-allele with mutational bias, large N limit"', ), 'args.legend = list(x="topleft", bty="n")', 'names.arg = c(1,2,3,4,5,6,7,8,9)', main='title.string', xlab='"frequency of allele 1"', ylab='"frequency"', col=mk_call_str( 'c', '"red"', '"white"', '"black"', '"gray"', '"blue"', ), beside='TRUE', ) #print >> out, 'box()' script = out.getvalue().rstrip() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table( script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
return out.getvalue() if __name__ == '__main__': k = 4 nsamples = 100 settings_list = [ [15, 50000, [0.002, 1, 1, 0, 0, 0]], [15, 50000, [0.002, 1, 2, 0.4, -1.2, 2]], [10, 10000, [0.008, 1, 1, 0.5, 1, 1.5]], [6, 5000, [0.01, 1, 2, 0, 0, 0]]] for N_diploid, nsites, params in settings_list: N = 2*N_diploid print 'diploid population size = %s, sequence length = %s' % ( N_diploid, nsites) print '\t'.join(str(x) for x in ['Input'] + params) mutation, fitnesses = kaizeng.params_to_mutation_fitness(N, params) P = kaizeng.get_transition_matrix(N, k, mutation, fitnesses) v = MatrixUtil.get_stationary_distribution(P) arr = [] for i in range(nsamples): counts = np.random.multinomial(nsites, v) X0 = np.array(params) g = G(N, counts) Xopt = optimize.fmin(g, X0) arr.append(Xopt.tolist()) means = [] cis = [] for mles in zip(*arr): means.append(np.mean(mles)) x = sorted(mles) cis.append([x[2], x[-2]])
return out.getvalue() if __name__ == '__main__': k = 4 nsamples = 100 settings_list = [[15, 50000, [0.002, 1, 1, 0, 0, 0]], [15, 50000, [0.002, 1, 2, 0.4, -1.2, 2]], [10, 10000, [0.008, 1, 1, 0.5, 1, 1.5]], [6, 5000, [0.01, 1, 2, 0, 0, 0]]] for N_diploid, nsites, params in settings_list: N = 2 * N_diploid print 'diploid population size = %s, sequence length = %s' % ( N_diploid, nsites) print '\t'.join(str(x) for x in ['Input'] + params) mutation, fitnesses = kaizeng.params_to_mutation_fitness(N, params) P = kaizeng.get_transition_matrix(N, k, mutation, fitnesses) v = MatrixUtil.get_stationary_distribution(P) arr = [] for i in range(nsamples): counts = np.random.multinomial(nsites, v) X0 = np.array(params) g = G(N, counts) Xopt = optimize.fmin(g, X0) arr.append(Xopt.tolist()) means = [] cis = [] for mles in zip(*arr): means.append(np.mean(mles)) x = sorted(mles) cis.append([x[2], x[-2]])
def get_response_content(fs): N_diploid = 5 N_haploid = N_diploid * 2 k = 4 gamma = 1.5 params_list = [ (0.008, 1, 1, 0, gamma, 1), (0.008, 2, 1, 0, gamma, 1)] allele_histograms = np.zeros((2, N_haploid+1)) for i, params in enumerate(params_list): mutation, fitnesses = kaizeng.params_to_mutation_fitness( N_haploid, params) P = kaizeng.get_transition_matrix( N_diploid, k, mutation, fitnesses) v = MatrixUtil.get_stationary_distribution(P) for state_index, counts in enumerate(kaizeng.gen_states(N_haploid, k)): if counts[0] and counts[1]: allele_histograms[i, counts[0]] += v[state_index] # Define the r table. # There are nine columns each corresponding to an allele frequency. # There are three rows each corresponding to a configuration. arr = [] # Use the exact two allele distribution. # Well, it is exact if I understand the right scaling # of the population size and fitnesses. f0 = 1.0 f1 = 1.0 - gamma / N_haploid #f0 = 1.0 + gamma / N #f1 = 1.0 #f0 = 1.0 + 1.5 / (4*N) #f1 = 1.0 - 1.5 / (4*N) h = get_two_allele_distribution(N_diploid, f0, f1) arr.append(h.tolist()) # Use the two allele approximation # from mcvean and charlesworth 1999 referred to by zeng 2011. # I'm not sure if I am using the right equation. """ gamma_0 = 0 gamma_1 = 1.5 s_0 = -gamma_0 / float(N) s_1 = -gamma_1 / float(N) hist = np.zeros(N+1) for i in range(1, N): x = i / float(N) hist[i] = math.exp(1*N*(s_0 - s_1)*x) / (x*(1-x)) h = hist[1:-1] h /= np.sum(h) arr.append(h.tolist()) """ # Get frequencies for the other two configurations for hist in allele_histograms: h = hist[1:-1] h /= np.sum(h) arr.append(h.tolist()) # define the r script out = StringIO() print >> out, 'title.string <- "allele 1 vs allele 2, gamma = 1.5"' print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr) print >> out, mk_call_str( 'barplot', 'mdat', 'legend.text=' + mk_call_str( 'c', '"two-allele"', '"four-allele without mutational bias"', '"four-allele with mutational bias kappa_{1,2}=2"', ), 'args.legend = list(x="topleft", bty="n")', 'names.arg = c(1,2,3,4,5,6,7,8,9)', main='title.string', xlab='"frequency of allele 1"', ylab='"frequency"', col=mk_call_str( 'c', #'"red"', '"white"', '"black"', '"gray"', ), beside='TRUE', ) #print >> out, 'box()' script = out.getvalue().rstrip() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table( script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_response_content(fs): N_diploid = 5 N_haploid = N_diploid * 2 k = 4 gamma = 1.5 params_list = [(0.008, 1, 1, 0, gamma, 1), (0.008, 2, 1, 0, gamma, 1)] allele_histograms = np.zeros((2, N_haploid + 1)) for i, params in enumerate(params_list): mutation, fitnesses = kaizeng.params_to_mutation_fitness( N_haploid, params) P = kaizeng.get_transition_matrix(N_diploid, k, mutation, fitnesses) v = MatrixUtil.get_stationary_distribution(P) for state_index, counts in enumerate(kaizeng.gen_states(N_haploid, k)): if counts[0] and counts[1]: allele_histograms[i, counts[0]] += v[state_index] # Define the r table. # There are nine columns each corresponding to an allele frequency. # There are three rows each corresponding to a configuration. arr = [] # Use the exact two allele distribution. # Well, it is exact if I understand the right scaling # of the population size and fitnesses. f0 = 1.0 f1 = 1.0 - gamma / N_haploid #f0 = 1.0 + gamma / N #f1 = 1.0 #f0 = 1.0 + 1.5 / (4*N) #f1 = 1.0 - 1.5 / (4*N) h = get_two_allele_distribution(N_diploid, f0, f1) arr.append(h.tolist()) # Use the two allele approximation # from mcvean and charlesworth 1999 referred to by zeng 2011. # I'm not sure if I am using the right equation. """ gamma_0 = 0 gamma_1 = 1.5 s_0 = -gamma_0 / float(N) s_1 = -gamma_1 / float(N) hist = np.zeros(N+1) for i in range(1, N): x = i / float(N) hist[i] = math.exp(1*N*(s_0 - s_1)*x) / (x*(1-x)) h = hist[1:-1] h /= np.sum(h) arr.append(h.tolist()) """ # Get frequencies for the other two configurations for hist in allele_histograms: h = hist[1:-1] h /= np.sum(h) arr.append(h.tolist()) # define the r script out = StringIO() print >> out, 'title.string <- "allele 1 vs allele 2, gamma = 1.5"' print >> out, 'mdat <-', RUtil.matrix_to_R_string(arr) print >> out, mk_call_str( 'barplot', 'mdat', 'legend.text=' + mk_call_str( 'c', '"two-allele"', '"four-allele without mutational bias"', '"four-allele with mutational bias kappa_{1,2}=2"', ), 'args.legend = list(x="topleft", bty="n")', 'names.arg = c(1,2,3,4,5,6,7,8,9)', main='title.string', xlab='"frequency of allele 1"', ylab='"frequency"', col=mk_call_str( 'c', #'"red"', '"white"', '"black"', '"gray"', ), beside='TRUE', ) #print >> out, 'box()' script = out.getvalue().rstrip() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter_no_table( script, device_name) if retcode: raise RUtil.RError(r_err) return image_data