def _object_func(params, data, model_func, sel_dist, theta, lower_bound=None, upper_bound=None, verbose=0, multinom=False, flush_delay=0, func_args=[], func_kwargs={}, fixed_params=None, ll_scale=1, output_stream=sys.stdout, store_thetas=False): """ Objective function for optimization. """ global _counter _counter += 1 # Deal with fixed parameters params_up = Inference._project_params_up(params, fixed_params) # Check our parameter bounds if lower_bound is not None: for pval, bound in zip(params_up, lower_bound): if bound is not None and pval < bound: return -_out_of_bounds_val / ll_scale if upper_bound is not None: for pval, bound in zip(params_up, upper_bound): if bound is not None and pval > bound: return -_out_of_bounds_val / ll_scale ns = data.sample_sizes all_args = [params_up, ns, sel_dist, theta] + list(func_args) sfs = model_func(*all_args, **func_kwargs) if multinom: result = Inference.ll_multinom(sfs, data) else: result = Inference.ll(sfs, data) if store_thetas: global _theta_store _theta_store[tuple(params)] = optimal_sfs_scaling(sfs, data) # Bad result if numpy.isnan(result): result = _out_of_bounds_val if (verbose > 0) and (_counter % verbose == 0): param_str = 'array([%s])' % (', '.join( ['%- 12g' % v for v in params_up])) output_stream.write('%-8i, %-12g, %s%s' % (_counter, result, param_str, os.linesep)) Misc.delayed_flush(delay=flush_delay) return -result / ll_scale
def _object_func(params, data, model_func, pts, lower_bound=None, upper_bound=None, verbose=0, multinom=True, flush_delay=0, func_args=[], func_kwargs={}, fixed_params=None, ll_scale=1, output_stream=sys.stdout, store_thetas=False): """ Objective function for optimization. """ global _counter _counter += 1 # Deal with fixed parameters params_up = _project_params_up(params, fixed_params) # Check our parameter bounds if lower_bound is not None: for pval,bound in zip(params_up, lower_bound): if bound is not None and pval < bound: return -_out_of_bounds_val/ll_scale if upper_bound is not None: for pval,bound in zip(params_up, upper_bound): if bound is not None and pval > bound: return -_out_of_bounds_val/ll_scale ns = data.sample_sizes all_args = [params_up, ns] + list(func_args) # Pass the pts argument via keyword, but don't alter the passed-in # func_kwargs func_kwargs = func_kwargs.copy() func_kwargs['pts'] = pts sfs = model_func(*all_args, **func_kwargs) if multinom: result = ll_multinom(sfs, data) else: result = ll(sfs, data) if store_thetas: global _theta_store _theta_store[tuple(params)] = optimal_sfs_scaling(sfs, data) # Bad result if numpy.isnan(result): result = _out_of_bounds_val if (verbose > 0) and (_counter % verbose == 0): param_str = 'array([%s])' % (', '.join(['%- 12g'%v for v in params_up])) output_stream.write('%-8i, %-12g, %s%s' % (_counter, result, param_str, os.linesep)) Misc.delayed_flush(delay=flush_delay) return -result/ll_scale
#!/usr/bin/env python import dadi import pylab import matplotlib.pyplot as plt import numpy as np from numpy import array from dadi import Misc, Spectrum, Numerics, PhiManip, Integration, Demographics1D, Demographics2D import sys infile = sys.argv[1] popid = [sys.argv[2]] proj = range(int(sys.argv[3]), int(sys.argv[4])) dd = Misc.make_data_dict(infile) for p in range(len(proj)): data = Spectrum.from_data_dict(dd, pop_ids=popid, projections=[proj[p]], polarized=False) print proj[p], data.S()
def four_pops(phi, xx, T, nu1=1, nu2=1, nu3=1, nu4=1, m12=0, m13=0, m14=0, m21=0, m23=0, m24=0, m31=0, m32=0, m34=0, m41=0, m42=0, m43=0, gamma1=0, gamma2=0, gamma3=0, gamma4=0, h1=0.5, h2=0.5, h3=0.5, h4=0.5, theta0=1, initial_t=0, frozen1=False, frozen2=False, frozen3=False, frozen4=False, enable_cuda_const=False): """ Integrate a 4-dimensional phi foward. phi: Initial 4-dimensional phi xx: 1-dimensional grid upon (0,1) overwhich phi is defined. It is assumed that this grid is used in all dimensions. nu's, gamma's, m's, and theta0 may be functions of time. nu1,nu2,nu3,nu4: Population sizes gamma1,gamma2,gamma3,gamma4: Selection coefficients on *all* segregating alleles h1,h2,h3,h4: Dominance coefficients. h = 0.5 corresponds to genic selection. m12,m13,m21,m23,m31,m32, ...: Migration rates. Note that m12 is the rate *into 1 from 2*. theta0: Proportional to ancestral size. Typically constant. T: Time at which to halt integration initial_t: Time at which to start integration. (Note that this only matters if one of the demographic parameters is a function of time.) enable_cuda_const: If True, enable CUDA integration with slower constant parameter method. Likely useful only for benchmarking. Note: Generalizing to different grids in different phi directions is straightforward. The tricky part will be later doing the extrapolation correctly. """ if T - initial_t == 0: return phi elif T - initial_t < 0: raise ValueError('Final integration time T (%f) is less than ' 'intial_time (%f). Integration cannot be run ' 'backwards.' % (T, initial_t)) if (frozen1 and (m12 != 0 or m21 != 0 or m13 !=0 or m31 != 0 or m41 != 0 or m14 != 0))\ or (frozen2 and (m12 != 0 or m21 != 0 or m23 != 0 or m32 != 0 or m24 != 0 or m42 != 0))\ or (frozen3 and (m13 != 0 or m31 != 0 or m23 !=0 or m32 != 0 or m34 != 0 or m43 != 0)): raise ValueError('Population cannot be frozen and have non-zero ' 'migration to or from it.') aa = zz = yy = xx nu1_f, nu2_f = Misc.ensure_1arg_func(nu1), Misc.ensure_1arg_func(nu2) nu3_f, nu4_f = Misc.ensure_1arg_func(nu3), Misc.ensure_1arg_func(nu4) gamma1_f, gamma2_f = Misc.ensure_1arg_func(gamma1), Misc.ensure_1arg_func( gamma2) gamma3_f, gamma4_f = Misc.ensure_1arg_func(gamma3), Misc.ensure_1arg_func( gamma4) h1_f, h2_f = Misc.ensure_1arg_func(h1), Misc.ensure_1arg_func(h2) h3_f, h4_f = Misc.ensure_1arg_func(h3), Misc.ensure_1arg_func(h4) m12_f, m13_f, m14_f = Misc.ensure_1arg_func(m12), Misc.ensure_1arg_func( m13), Misc.ensure_1arg_func(m14) m21_f, m23_f, m24_f = Misc.ensure_1arg_func(m21), Misc.ensure_1arg_func( m23), Misc.ensure_1arg_func(m24) m31_f, m32_f, m34_f = Misc.ensure_1arg_func(m31), Misc.ensure_1arg_func( m32), Misc.ensure_1arg_func(m34) m41_f, m42_f, m43_f = Misc.ensure_1arg_func(m41), Misc.ensure_1arg_func( m42), Misc.ensure_1arg_func(m43) theta0_f = Misc.ensure_1arg_func(theta0) #if cuda_enabled: # import dadi.cuda # phi = dadi.cuda.Integration._three_pops_temporal_params(phi, xx, T, initial_t, # nu1_f, nu2_f, nu3_f, m12_f, m13_f, m21_f, m23_f, m31_f, m32_f, # gamma1_f, gamma2_f, gamma3_f, h1_f, h2_f, h3_f, # theta0_f, frozen1, frozen2, frozen3) # return phi current_t = initial_t nu1, nu2, nu3, nu4 = nu1_f(current_t), nu2_f(current_t), nu3_f( current_t), nu4_f(current_t) gamma1, gamma2, gamma3, gamma4 = gamma1_f(current_t), gamma2_f( current_t), gamma3_f(current_t), gamma4_f(current_t) h1, h2, h3, h4 = h1_f(current_t), h2_f(current_t), h3_f(current_t), h4_f( current_t) m12, m13, m14 = m12_f(current_t), m13_f(current_t), m14_f(current_t) m21, m23, m24 = m21_f(current_t), m23_f(current_t), m24_f(current_t) m31, m32, m34 = m31_f(current_t), m32_f(current_t), m34_f(current_t) m41, m42, m43 = m41_f(current_t), m42_f(current_t), m43_f(current_t) dx, dy, dz, da = numpy.diff(xx), numpy.diff(yy), numpy.diff( zz), numpy.diff(aa) while current_t < T: dt = min(_compute_dt(dx, nu1, [m12, m13, m14], gamma1, h1), _compute_dt(dy, nu2, [m21, m23, m24], gamma2, h2), _compute_dt(dz, nu3, [m31, m32, m34], gamma3, h3), _compute_dt(da, nu4, [m41, m42, m43], gamma4, h4)) this_dt = min(dt, T - current_t) next_t = current_t + this_dt nu1, nu2, nu3, nu4 = nu1_f(next_t), nu2_f(next_t), nu3_f( next_t), nu4_f(next_t) gamma1, gamma2, gamma3, gamma4 = gamma1_f(next_t), gamma2_f( next_t), gamma3_f(next_t), gamma4_f(next_t) h1, h2, h3, h4 = h1_f(next_t), h2_f(next_t), h3_f(next_t), h4_f(next_t) m12, m13, m14 = m12_f(next_t), m13_f(next_t), m14_f(next_t) m21, m23, m24 = m21_f(next_t), m23_f(next_t), m24_f(next_t) m31, m32, m34 = m31_f(next_t), m32_f(next_t), m34_f(next_t) m41, m42, m43 = m41_f(next_t), m42_f(next_t), m43_f(next_t) theta0 = theta0_f(next_t) if numpy.any( numpy.less([ T, nu1, nu2, nu3, nu4, m12, m13, m14, m21, m23, m24, m31, m32, m34, m41, m42, m43, theta0 ], 0)): raise ValueError( 'A time, population size, migration rate, or ' 'theta0 is < 0. Has the model been mis-specified?') if numpy.any(numpy.equal([nu1, nu2, nu3, nu4], 0)): raise ValueError('A population size is 0. Has the model been ' 'mis-specified?') _inject_mutations_4D(phi, this_dt, xx, yy, zz, aa, theta0, frozen1, frozen2, frozen3, frozen4) if not frozen1: phi = int_c.implicit_4Dx(phi, xx, yy, zz, aa, nu1, m12, m13, m14, gamma1, h1, this_dt, use_delj_trick) if not frozen2: phi = int_c.implicit_4Dy(phi, xx, yy, zz, aa, nu2, m21, m23, m24, gamma2, h2, this_dt, use_delj_trick) if not frozen3: phi = int_c.implicit_4Dz(phi, xx, yy, zz, aa, nu3, m31, m32, m34, gamma3, h3, this_dt, use_delj_trick) if not frozen4: phi = int_c.implicit_4Da(phi, xx, yy, zz, aa, nu4, m41, m42, m43, gamma4, h4, this_dt, use_delj_trick) current_t = next_t return phi
def two_pops(phi, xx, T, nu1=1, nu2=1, m12=0, m21=0, gamma1=0, gamma2=0, h1=0.5, h2=0.5, theta0=1, initial_t=0, frozen1=False, frozen2=False, nomut1=False, nomut2=False, enable_cuda_const=False): """ Integrate a 2-dimensional phi foward. phi: Initial 2-dimensional phi xx: 1-dimensional grid upon (0,1) overwhich phi is defined. It is assumed that this grid is used in all dimensions. nu's, gamma's, m's, and theta0 may be functions of time. nu1,nu2: Population sizes gamma1,gamma2: Selection coefficients on *all* segregating alleles h1,h2: Dominance coefficients. h = 0.5 corresponds to genic selection. m12,m21: Migration rates. Note that m12 is the rate *into 1 from 2*. theta0: Propotional to ancestral size. Typically constant. T: Time at which to halt integration initial_t: Time at which to start integration. (Note that this only matters if one of the demographic parameters is a function of time.) frozen1,frozen2: If True, the corresponding population is "frozen" in time (no new mutations and no drift), so the resulting spectrum will correspond to an ancient DNA sample from that population. nomut1,nomut2: If True, no new mutations will be introduced into the given population. enable_cuda_const: If True, enable CUDA integration with slower constant parameter method. Likely useful only for benchmarking. Note: Generalizing to different grids in different phi directions is straightforward. The tricky part will be later doing the extrapolation correctly. """ phi = phi.copy() if T - initial_t == 0: return phi elif T - initial_t < 0: raise ValueError('Final integration time T (%f) is less than ' 'intial_time (%f). Integration cannot be run ' 'backwards.' % (T, initial_t)) if (frozen1 or frozen2) and (m12 != 0 or m21 != 0): raise ValueError('Population cannot be frozen and have non-zero ' 'migration to or from it.') vars_to_check = [nu1, nu2, m12, m21, gamma1, gamma2, h1, h2, theta0] if numpy.all([numpy.isscalar(var) for var in vars_to_check]): # Constant integration with CUDA turns out to be slower, # so we only use it in specific circumsances. if not cuda_enabled or (cuda_enabled and enable_cuda_const): return _two_pops_const_params(phi, xx, T, nu1, nu2, m12, m21, gamma1, gamma2, h1, h2, theta0, initial_t, frozen1, frozen2, nomut1, nomut2) yy = xx nu1_f = Misc.ensure_1arg_func(nu1) nu2_f = Misc.ensure_1arg_func(nu2) m12_f = Misc.ensure_1arg_func(m12) m21_f = Misc.ensure_1arg_func(m21) gamma1_f = Misc.ensure_1arg_func(gamma1) gamma2_f = Misc.ensure_1arg_func(gamma2) h1_f = Misc.ensure_1arg_func(h1) h2_f = Misc.ensure_1arg_func(h2) theta0_f = Misc.ensure_1arg_func(theta0) if cuda_enabled: import dadi.cuda phi = dadi.cuda.Integration._two_pops_temporal_params( phi, xx, T, initial_t, nu1_f, nu2_f, m12_f, m21_f, gamma1_f, gamma2_f, h1_f, h2_f, theta0_f, frozen1, frozen2, nomut1, nomut2) return phi current_t = initial_t nu1, nu2 = nu1_f(current_t), nu2_f(current_t) m12, m21 = m12_f(current_t), m21_f(current_t) gamma1, gamma2 = gamma1_f(current_t), gamma2_f(current_t) h1, h2 = h1_f(current_t), h2_f(current_t) dx, dy = numpy.diff(xx), numpy.diff(yy) while current_t < T: dt = min(_compute_dt(dx, nu1, [m12], gamma1, h1), _compute_dt(dy, nu2, [m21], gamma2, h2)) this_dt = min(dt, T - current_t) next_t = current_t + this_dt nu1, nu2 = nu1_f(next_t), nu2_f(next_t) m12, m21 = m12_f(next_t), m21_f(next_t) gamma1, gamma2 = gamma1_f(next_t), gamma2_f(next_t) h1, h2 = h1_f(next_t), h2_f(next_t) theta0 = theta0_f(next_t) if numpy.any(numpy.less([T, nu1, nu2, m12, m21, theta0], 0)): raise ValueError( 'A time, population size, migration rate, or ' 'theta0 is < 0. Has the model been mis-specified?') if numpy.any(numpy.equal([nu1, nu2], 0)): raise ValueError('A population size is 0. Has the model been ' 'mis-specified?') _inject_mutations_2D(phi, this_dt, xx, yy, theta0, frozen1, frozen2, nomut1, nomut2) if not frozen1: phi = int_c.implicit_2Dx(phi, xx, yy, nu1, m12, gamma1, h1, this_dt, use_delj_trick) if not frozen2: phi = int_c.implicit_2Dy(phi, xx, yy, nu2, m21, gamma2, h2, this_dt, use_delj_trick) current_t = next_t return phi
def one_pop(phi, xx, T, nu=1, gamma=0, h=0.5, theta0=1.0, initial_t=0, frozen=False, beta=1): """ Integrate a 1-dimensional phi forward. phi: Initial 1-dimensional phi xx: Grid upon (0,1) overwhich phi is defined. nu, gamma, and theta0 may be functions of time. nu: Population size gamma: Selection coefficient on *all* segregating alleles h: Dominance coefficient. h = 0.5 corresponds to genic selection. q Heterozygotes have fitness 1+2sh and homozygotes have fitness 1+2s. theta0: Propotional to ancestral size. Typically constant. beta: Breeding ratio, beta=Nf/Nm. T: Time at which to halt integration initial_t: Time at which to start integration. (Note that this only matters if one of the demographic parameters is a function of time.) frozen: If True, population is 'frozen' so that it does not change. In the one_pop case, this is equivalent to not running the integration at all. """ phi = phi.copy() # For a one population integration, freezing means just not integrating. if frozen: return phi if T - initial_t == 0: return phi elif T - initial_t < 0: raise ValueError('Final integration time T (%f) is less than ' 'intial_time (%f). Integration cannot be run ' 'backwards.' % (T, initial_t)) vars_to_check = (nu, gamma, h, theta0, beta) if numpy.all([numpy.isscalar(var) for var in vars_to_check]): return _one_pop_const_params(phi, xx, T, nu, gamma, h, theta0, initial_t, beta) nu_f = Misc.ensure_1arg_func(nu) gamma_f = Misc.ensure_1arg_func(gamma) h_f = Misc.ensure_1arg_func(h) theta0_f = Misc.ensure_1arg_func(theta0) beta_f = Misc.ensure_1arg_func(beta) current_t = initial_t nu, gamma, h = nu_f(current_t), gamma_f(current_t), h_f(current_t) beta = beta_f(current_t) dx = numpy.diff(xx) while current_t < T: dt = _compute_dt(dx, nu, [0], gamma, h) this_dt = min(dt, T - current_t) # Because this is an implicit method, I need the *next* time's params. # So there's a little inconsistency here, in that I'm estimating dt # using the last timepoints nu,gamma,h. next_t = current_t + this_dt nu, gamma, h = nu_f(next_t), gamma_f(next_t), h_f(next_t) beta = beta_f(next_t) theta0 = theta0_f(next_t) if numpy.any(numpy.less([T, nu, theta0], 0)): raise ValueError( 'A time, population size, migration rate, or ' 'theta0 is < 0. Has the model been mis-specified?') if numpy.any(numpy.equal([nu], 0)): raise ValueError('A population size is 0. Has the model been ' 'mis-specified?') _inject_mutations_1D(phi, this_dt, xx, theta0) # Do each step in C, since it will be faster to compute the a,b,c # matrices there. phi = int_c.implicit_1Dx(phi, xx, nu, gamma, h, beta, this_dt, use_delj_trick=use_delj_trick) current_t = next_t return phi
import numpy import dadi import Plotting_Functions from dadi import Numerics, PhiManip, Integration, Misc from dadi.Spectrum_mod import Spectrum #=========================================================================== # Import data to create joint-site frequency spectrum #=========================================================================== #************** #path to your input file snps = "/Users/portik/Documents/GitHub/dadi_pipeline/Two_Population_Pipeline/Example_Data/dadi_2pops_North_South_snps.txt" #Create python dictionary from snps file dd = Misc.make_data_dict(snps) #************** #pop_ids is a list which should match the populations headers of your SNPs file columns pop_ids = ["North", "South"] #************** #projection sizes, in ALLELES not individuals proj = [16, 32] #Convert this dictionary into folded AFS object #[polarized = False] creates folded spectrum object fs = Spectrum.from_data_dict(dd, pop_ids=pop_ids, projections=proj, polarized=False)
def _object_func(params, data1, data2, cache1, cache2, model_func, sel_dist, scal_fac1, scal_fac2, theta1, theta2, lower_bound=None, upper_bound=None, verbose=0, multinom=False, flush_delay=0, func_args=[], func_kwargs={}, fixed_params1=None, fixed_params2=None, ll_scale=1, output_stream=sys.stdout, store_thetas=False): """ Objective function for optimization. """ global _counter _counter += 1 # Scaling factors scales sel_dist differently for species 1 and species 2 sel_dist1 = copy_func( sel_dist, defaults=scal_fac1) # scal_fac1 should be 2*Nea of pop 1 sel_dist2 = copy_func( sel_dist, defaults=scal_fac2) # scal_fac2 should be 4*Nea of pop 2 # Deal with fixed parameters params_up1 = Inference._project_params_up(params, fixed_params1) params_up2 = Inference._project_params_up(params, fixed_params2) # Check our parameter bounds if lower_bound is not None: for pval, bound in zip(params_up1, lower_bound): if bound is not None and pval < bound: return -_out_of_bounds_val / ll_scale if upper_bound is not None: for pval, bound in zip(params_up1, upper_bound): if bound is not None and pval > bound: return -_out_of_bounds_val / ll_scale ns1 = data1.sample_sizes ns2 = data2.sample_sizes all_args1 = [params_up1, ns1, sel_dist1, theta1, cache1] + list(func_args) all_args2 = [params_up2, ns2, sel_dist2, theta2, cache2] + list(func_args) # Pass the pts argument via keyword, but don't alter the passed-in # func_kwargs #func_kwargs = func_kwargs.copy() #func_kwargs['pts'] = pts sfs1 = model_func(*all_args1, **func_kwargs) sfs2 = model_func(*all_args2, **func_kwargs) if multinom: result = Inference.ll_multinom(sfs1, data1) + Inference.ll_multinom( sfs2, data2) else: result = Inference.ll(sfs1, data1) + Inference.ll(sfs2, data2) # Bad result if numpy.isnan(result): result = _out_of_bounds_val if (verbose > 0) and (_counter % verbose == 0): param_str = 'array([%s])' % (', '.join( ['%- 12g' % v for v in params_up1])) output_stream.write('%-8i, %-12g, %s%s' % (_counter, result, param_str, os.linesep)) Misc.delayed_flush(delay=flush_delay) return -result / ll_scale