Exemplo n.º 1
0
def _object_func(params,
                 data,
                 model_func,
                 sel_dist,
                 theta,
                 lower_bound=None,
                 upper_bound=None,
                 verbose=0,
                 multinom=False,
                 flush_delay=0,
                 func_args=[],
                 func_kwargs={},
                 fixed_params=None,
                 ll_scale=1,
                 output_stream=sys.stdout,
                 store_thetas=False):
    """
    Objective function for optimization.
    """
    global _counter
    _counter += 1

    # Deal with fixed parameters
    params_up = Inference._project_params_up(params, fixed_params)

    # Check our parameter bounds
    if lower_bound is not None:
        for pval, bound in zip(params_up, lower_bound):
            if bound is not None and pval < bound:
                return -_out_of_bounds_val / ll_scale
    if upper_bound is not None:
        for pval, bound in zip(params_up, upper_bound):
            if bound is not None and pval > bound:
                return -_out_of_bounds_val / ll_scale

    ns = data.sample_sizes
    all_args = [params_up, ns, sel_dist, theta] + list(func_args)

    sfs = model_func(*all_args, **func_kwargs)
    if multinom:
        result = Inference.ll_multinom(sfs, data)
    else:
        result = Inference.ll(sfs, data)

    if store_thetas:
        global _theta_store
        _theta_store[tuple(params)] = optimal_sfs_scaling(sfs, data)

    # Bad result
    if numpy.isnan(result):
        result = _out_of_bounds_val

    if (verbose > 0) and (_counter % verbose == 0):
        param_str = 'array([%s])' % (', '.join(
            ['%- 12g' % v for v in params_up]))
        output_stream.write('%-8i, %-12g, %s%s' %
                            (_counter, result, param_str, os.linesep))
        Misc.delayed_flush(delay=flush_delay)

    return -result / ll_scale
def _object_func(params, data, model_func, pts, 
                 lower_bound=None, upper_bound=None, 
                 verbose=0, multinom=True, flush_delay=0,
                 func_args=[], func_kwargs={}, fixed_params=None, ll_scale=1,
                 output_stream=sys.stdout, store_thetas=False):
    """
    Objective function for optimization.
    """
    global _counter
    _counter += 1

    # Deal with fixed parameters
    params_up = _project_params_up(params, fixed_params)

    # Check our parameter bounds
    if lower_bound is not None:
        for pval,bound in zip(params_up, lower_bound):
            if bound is not None and pval < bound:
                return -_out_of_bounds_val/ll_scale
    if upper_bound is not None:
        for pval,bound in zip(params_up, upper_bound):
            if bound is not None and pval > bound:
                return -_out_of_bounds_val/ll_scale

    ns = data.sample_sizes 
    all_args = [params_up, ns] + list(func_args)
    # Pass the pts argument via keyword, but don't alter the passed-in 
    # func_kwargs
    func_kwargs = func_kwargs.copy()
    func_kwargs['pts'] = pts
    sfs = model_func(*all_args, **func_kwargs)
    if multinom:
        result = ll_multinom(sfs, data)
    else:
        result = ll(sfs, data)

    if store_thetas:
        global _theta_store
        _theta_store[tuple(params)] = optimal_sfs_scaling(sfs, data)

    # Bad result
    if numpy.isnan(result):
        result = _out_of_bounds_val

    if (verbose > 0) and (_counter % verbose == 0):
        param_str = 'array([%s])' % (', '.join(['%- 12g'%v for v in params_up]))
        output_stream.write('%-8i, %-12g, %s%s' % (_counter, result, param_str,
                                                   os.linesep))
        Misc.delayed_flush(delay=flush_delay)

    return -result/ll_scale
#!/usr/bin/env python
import dadi
import pylab
import matplotlib.pyplot as plt
import numpy as np
from numpy import array
from dadi import Misc, Spectrum, Numerics, PhiManip, Integration, Demographics1D, Demographics2D
import sys
infile = sys.argv[1]
popid = [sys.argv[2]]
proj = range(int(sys.argv[3]), int(sys.argv[4]))
dd = Misc.make_data_dict(infile)
for p in range(len(proj)):
    data = Spectrum.from_data_dict(dd,
                                   pop_ids=popid,
                                   projections=[proj[p]],
                                   polarized=False)
    print proj[p], data.S()
Exemplo n.º 4
0
def four_pops(phi,
              xx,
              T,
              nu1=1,
              nu2=1,
              nu3=1,
              nu4=1,
              m12=0,
              m13=0,
              m14=0,
              m21=0,
              m23=0,
              m24=0,
              m31=0,
              m32=0,
              m34=0,
              m41=0,
              m42=0,
              m43=0,
              gamma1=0,
              gamma2=0,
              gamma3=0,
              gamma4=0,
              h1=0.5,
              h2=0.5,
              h3=0.5,
              h4=0.5,
              theta0=1,
              initial_t=0,
              frozen1=False,
              frozen2=False,
              frozen3=False,
              frozen4=False,
              enable_cuda_const=False):
    """
    Integrate a 4-dimensional phi foward.

    phi: Initial 4-dimensional phi
    xx: 1-dimensional grid upon (0,1) overwhich phi is defined. It is assumed
        that this grid is used in all dimensions.

    nu's, gamma's, m's, and theta0 may be functions of time.
    nu1,nu2,nu3,nu4: Population sizes
    gamma1,gamma2,gamma3,gamma4: Selection coefficients on *all* segregating alleles
    h1,h2,h3,h4: Dominance coefficients. h = 0.5 corresponds to genic selection.
    m12,m13,m21,m23,m31,m32, ...: Migration rates. Note that m12 is the rate 
                             *into 1 from 2*.
    theta0: Proportional to ancestral size. Typically constant.

    T: Time at which to halt integration
    initial_t: Time at which to start integration. (Note that this only matters
               if one of the demographic parameters is a function of time.)

    enable_cuda_const: If True, enable CUDA integration with slower constant
                       parameter method. Likely useful only for benchmarking.

    Note: Generalizing to different grids in different phi directions is
          straightforward. The tricky part will be later doing the extrapolation
          correctly.
    """
    if T - initial_t == 0:
        return phi
    elif T - initial_t < 0:
        raise ValueError('Final integration time T (%f) is less than '
                         'intial_time (%f). Integration cannot be run '
                         'backwards.' % (T, initial_t))


    if (frozen1 and (m12 != 0 or m21 != 0 or m13 !=0 or m31 != 0 or m41 != 0 or m14 != 0))\
       or (frozen2 and (m12 != 0 or m21 != 0 or m23 != 0 or m32 != 0 or m24 != 0 or m42 != 0))\
       or (frozen3 and (m13 != 0 or m31 != 0 or m23 !=0 or m32 != 0 or m34 != 0 or m43 != 0)):
        raise ValueError('Population cannot be frozen and have non-zero '
                         'migration to or from it.')
    aa = zz = yy = xx

    nu1_f, nu2_f = Misc.ensure_1arg_func(nu1), Misc.ensure_1arg_func(nu2)
    nu3_f, nu4_f = Misc.ensure_1arg_func(nu3), Misc.ensure_1arg_func(nu4)
    gamma1_f, gamma2_f = Misc.ensure_1arg_func(gamma1), Misc.ensure_1arg_func(
        gamma2)
    gamma3_f, gamma4_f = Misc.ensure_1arg_func(gamma3), Misc.ensure_1arg_func(
        gamma4)
    h1_f, h2_f = Misc.ensure_1arg_func(h1), Misc.ensure_1arg_func(h2)
    h3_f, h4_f = Misc.ensure_1arg_func(h3), Misc.ensure_1arg_func(h4)
    m12_f, m13_f, m14_f = Misc.ensure_1arg_func(m12), Misc.ensure_1arg_func(
        m13), Misc.ensure_1arg_func(m14)
    m21_f, m23_f, m24_f = Misc.ensure_1arg_func(m21), Misc.ensure_1arg_func(
        m23), Misc.ensure_1arg_func(m24)
    m31_f, m32_f, m34_f = Misc.ensure_1arg_func(m31), Misc.ensure_1arg_func(
        m32), Misc.ensure_1arg_func(m34)
    m41_f, m42_f, m43_f = Misc.ensure_1arg_func(m41), Misc.ensure_1arg_func(
        m42), Misc.ensure_1arg_func(m43)
    theta0_f = Misc.ensure_1arg_func(theta0)

    #if cuda_enabled:
    #    import dadi.cuda
    #    phi = dadi.cuda.Integration._three_pops_temporal_params(phi, xx, T, initial_t,
    #            nu1_f, nu2_f, nu3_f, m12_f, m13_f, m21_f, m23_f, m31_f, m32_f,
    #            gamma1_f, gamma2_f, gamma3_f, h1_f, h2_f, h3_f,
    #            theta0_f, frozen1, frozen2, frozen3)
    #    return phi

    current_t = initial_t
    nu1, nu2, nu3, nu4 = nu1_f(current_t), nu2_f(current_t), nu3_f(
        current_t), nu4_f(current_t)
    gamma1, gamma2, gamma3, gamma4 = gamma1_f(current_t), gamma2_f(
        current_t), gamma3_f(current_t), gamma4_f(current_t)
    h1, h2, h3, h4 = h1_f(current_t), h2_f(current_t), h3_f(current_t), h4_f(
        current_t)
    m12, m13, m14 = m12_f(current_t), m13_f(current_t), m14_f(current_t)
    m21, m23, m24 = m21_f(current_t), m23_f(current_t), m24_f(current_t)
    m31, m32, m34 = m31_f(current_t), m32_f(current_t), m34_f(current_t)
    m41, m42, m43 = m41_f(current_t), m42_f(current_t), m43_f(current_t)

    dx, dy, dz, da = numpy.diff(xx), numpy.diff(yy), numpy.diff(
        zz), numpy.diff(aa)
    while current_t < T:
        dt = min(_compute_dt(dx, nu1, [m12, m13, m14], gamma1, h1),
                 _compute_dt(dy, nu2, [m21, m23, m24], gamma2, h2),
                 _compute_dt(dz, nu3, [m31, m32, m34], gamma3, h3),
                 _compute_dt(da, nu4, [m41, m42, m43], gamma4, h4))
        this_dt = min(dt, T - current_t)

        next_t = current_t + this_dt

        nu1, nu2, nu3, nu4 = nu1_f(next_t), nu2_f(next_t), nu3_f(
            next_t), nu4_f(next_t)
        gamma1, gamma2, gamma3, gamma4 = gamma1_f(next_t), gamma2_f(
            next_t), gamma3_f(next_t), gamma4_f(next_t)
        h1, h2, h3, h4 = h1_f(next_t), h2_f(next_t), h3_f(next_t), h4_f(next_t)
        m12, m13, m14 = m12_f(next_t), m13_f(next_t), m14_f(next_t)
        m21, m23, m24 = m21_f(next_t), m23_f(next_t), m24_f(next_t)
        m31, m32, m34 = m31_f(next_t), m32_f(next_t), m34_f(next_t)
        m41, m42, m43 = m41_f(next_t), m42_f(next_t), m43_f(next_t)
        theta0 = theta0_f(next_t)

        if numpy.any(
                numpy.less([
                    T, nu1, nu2, nu3, nu4, m12, m13, m14, m21, m23, m24, m31,
                    m32, m34, m41, m42, m43, theta0
                ], 0)):
            raise ValueError(
                'A time, population size, migration rate, or '
                'theta0 is < 0. Has the model been mis-specified?')
        if numpy.any(numpy.equal([nu1, nu2, nu3, nu4], 0)):
            raise ValueError('A population size is 0. Has the model been '
                             'mis-specified?')

        _inject_mutations_4D(phi, this_dt, xx, yy, zz, aa, theta0, frozen1,
                             frozen2, frozen3, frozen4)
        if not frozen1:
            phi = int_c.implicit_4Dx(phi, xx, yy, zz, aa, nu1, m12, m13, m14,
                                     gamma1, h1, this_dt, use_delj_trick)
        if not frozen2:
            phi = int_c.implicit_4Dy(phi, xx, yy, zz, aa, nu2, m21, m23, m24,
                                     gamma2, h2, this_dt, use_delj_trick)
        if not frozen3:
            phi = int_c.implicit_4Dz(phi, xx, yy, zz, aa, nu3, m31, m32, m34,
                                     gamma3, h3, this_dt, use_delj_trick)
        if not frozen4:
            phi = int_c.implicit_4Da(phi, xx, yy, zz, aa, nu4, m41, m42, m43,
                                     gamma4, h4, this_dt, use_delj_trick)

        current_t = next_t
    return phi
Exemplo n.º 5
0
def two_pops(phi,
             xx,
             T,
             nu1=1,
             nu2=1,
             m12=0,
             m21=0,
             gamma1=0,
             gamma2=0,
             h1=0.5,
             h2=0.5,
             theta0=1,
             initial_t=0,
             frozen1=False,
             frozen2=False,
             nomut1=False,
             nomut2=False,
             enable_cuda_const=False):
    """
    Integrate a 2-dimensional phi foward.

    phi: Initial 2-dimensional phi
    xx: 1-dimensional grid upon (0,1) overwhich phi is defined. It is assumed
        that this grid is used in all dimensions.

    nu's, gamma's, m's, and theta0 may be functions of time.
    nu1,nu2: Population sizes
    gamma1,gamma2: Selection coefficients on *all* segregating alleles
    h1,h2: Dominance coefficients. h = 0.5 corresponds to genic selection.
    m12,m21: Migration rates. Note that m12 is the rate *into 1 from 2*.
    theta0: Propotional to ancestral size. Typically constant.

    T: Time at which to halt integration
    initial_t: Time at which to start integration. (Note that this only matters
               if one of the demographic parameters is a function of time.)

    frozen1,frozen2: If True, the corresponding population is "frozen" in time
                     (no new mutations and no drift), so the resulting spectrum
                     will correspond to an ancient DNA sample from that
                     population.

    nomut1,nomut2: If True, no new mutations will be introduced into the
                   given population.

    enable_cuda_const: If True, enable CUDA integration with slower constant
                       parameter method. Likely useful only for benchmarking.

    Note: Generalizing to different grids in different phi directions is
          straightforward. The tricky part will be later doing the extrapolation
          correctly.
    """
    phi = phi.copy()

    if T - initial_t == 0:
        return phi
    elif T - initial_t < 0:
        raise ValueError('Final integration time T (%f) is less than '
                         'intial_time (%f). Integration cannot be run '
                         'backwards.' % (T, initial_t))

    if (frozen1 or frozen2) and (m12 != 0 or m21 != 0):
        raise ValueError('Population cannot be frozen and have non-zero '
                         'migration to or from it.')

    vars_to_check = [nu1, nu2, m12, m21, gamma1, gamma2, h1, h2, theta0]
    if numpy.all([numpy.isscalar(var) for var in vars_to_check]):
        # Constant integration with CUDA turns out to be slower,
        # so we only use it in specific circumsances.
        if not cuda_enabled or (cuda_enabled and enable_cuda_const):
            return _two_pops_const_params(phi, xx, T, nu1, nu2, m12, m21,
                                          gamma1, gamma2, h1, h2, theta0,
                                          initial_t, frozen1, frozen2, nomut1,
                                          nomut2)
    yy = xx

    nu1_f = Misc.ensure_1arg_func(nu1)
    nu2_f = Misc.ensure_1arg_func(nu2)
    m12_f = Misc.ensure_1arg_func(m12)
    m21_f = Misc.ensure_1arg_func(m21)
    gamma1_f = Misc.ensure_1arg_func(gamma1)
    gamma2_f = Misc.ensure_1arg_func(gamma2)
    h1_f = Misc.ensure_1arg_func(h1)
    h2_f = Misc.ensure_1arg_func(h2)
    theta0_f = Misc.ensure_1arg_func(theta0)

    if cuda_enabled:
        import dadi.cuda
        phi = dadi.cuda.Integration._two_pops_temporal_params(
            phi, xx, T, initial_t, nu1_f, nu2_f, m12_f, m21_f, gamma1_f,
            gamma2_f, h1_f, h2_f, theta0_f, frozen1, frozen2, nomut1, nomut2)
        return phi

    current_t = initial_t
    nu1, nu2 = nu1_f(current_t), nu2_f(current_t)
    m12, m21 = m12_f(current_t), m21_f(current_t)
    gamma1, gamma2 = gamma1_f(current_t), gamma2_f(current_t)
    h1, h2 = h1_f(current_t), h2_f(current_t)
    dx, dy = numpy.diff(xx), numpy.diff(yy)
    while current_t < T:
        dt = min(_compute_dt(dx, nu1, [m12], gamma1, h1),
                 _compute_dt(dy, nu2, [m21], gamma2, h2))
        this_dt = min(dt, T - current_t)

        next_t = current_t + this_dt

        nu1, nu2 = nu1_f(next_t), nu2_f(next_t)
        m12, m21 = m12_f(next_t), m21_f(next_t)
        gamma1, gamma2 = gamma1_f(next_t), gamma2_f(next_t)
        h1, h2 = h1_f(next_t), h2_f(next_t)
        theta0 = theta0_f(next_t)

        if numpy.any(numpy.less([T, nu1, nu2, m12, m21, theta0], 0)):
            raise ValueError(
                'A time, population size, migration rate, or '
                'theta0 is < 0. Has the model been mis-specified?')
        if numpy.any(numpy.equal([nu1, nu2], 0)):
            raise ValueError('A population size is 0. Has the model been '
                             'mis-specified?')

        _inject_mutations_2D(phi, this_dt, xx, yy, theta0, frozen1, frozen2,
                             nomut1, nomut2)
        if not frozen1:
            phi = int_c.implicit_2Dx(phi, xx, yy, nu1, m12, gamma1, h1,
                                     this_dt, use_delj_trick)
        if not frozen2:
            phi = int_c.implicit_2Dy(phi, xx, yy, nu2, m21, gamma2, h2,
                                     this_dt, use_delj_trick)

        current_t = next_t
    return phi
Exemplo n.º 6
0
def one_pop(phi,
            xx,
            T,
            nu=1,
            gamma=0,
            h=0.5,
            theta0=1.0,
            initial_t=0,
            frozen=False,
            beta=1):
    """
    Integrate a 1-dimensional phi forward.

    phi: Initial 1-dimensional phi
    xx: Grid upon (0,1) overwhich phi is defined.

    nu, gamma, and theta0 may be functions of time.
    nu: Population size
    gamma: Selection coefficient on *all* segregating alleles
    h: Dominance coefficient. h = 0.5 corresponds to genic selection. q
       Heterozygotes have fitness 1+2sh and homozygotes have fitness 1+2s.
    theta0: Propotional to ancestral size. Typically constant.
    beta: Breeding ratio, beta=Nf/Nm.

    T: Time at which to halt integration
    initial_t: Time at which to start integration. (Note that this only matters
               if one of the demographic parameters is a function of time.)

    frozen: If True, population is 'frozen' so that it does not change.
            In the one_pop case, this is equivalent to not running the
            integration at all.
    """
    phi = phi.copy()

    # For a one population integration, freezing means just not integrating.
    if frozen:
        return phi

    if T - initial_t == 0:
        return phi
    elif T - initial_t < 0:
        raise ValueError('Final integration time T (%f) is less than '
                         'intial_time (%f). Integration cannot be run '
                         'backwards.' % (T, initial_t))

    vars_to_check = (nu, gamma, h, theta0, beta)
    if numpy.all([numpy.isscalar(var) for var in vars_to_check]):
        return _one_pop_const_params(phi, xx, T, nu, gamma, h, theta0,
                                     initial_t, beta)

    nu_f = Misc.ensure_1arg_func(nu)
    gamma_f = Misc.ensure_1arg_func(gamma)
    h_f = Misc.ensure_1arg_func(h)
    theta0_f = Misc.ensure_1arg_func(theta0)
    beta_f = Misc.ensure_1arg_func(beta)

    current_t = initial_t
    nu, gamma, h = nu_f(current_t), gamma_f(current_t), h_f(current_t)
    beta = beta_f(current_t)
    dx = numpy.diff(xx)
    while current_t < T:
        dt = _compute_dt(dx, nu, [0], gamma, h)
        this_dt = min(dt, T - current_t)

        # Because this is an implicit method, I need the *next* time's params.
        # So there's a little inconsistency here, in that I'm estimating dt
        # using the last timepoints nu,gamma,h.
        next_t = current_t + this_dt
        nu, gamma, h = nu_f(next_t), gamma_f(next_t), h_f(next_t)
        beta = beta_f(next_t)
        theta0 = theta0_f(next_t)

        if numpy.any(numpy.less([T, nu, theta0], 0)):
            raise ValueError(
                'A time, population size, migration rate, or '
                'theta0 is < 0. Has the model been mis-specified?')
        if numpy.any(numpy.equal([nu], 0)):
            raise ValueError('A population size is 0. Has the model been '
                             'mis-specified?')

        _inject_mutations_1D(phi, this_dt, xx, theta0)
        # Do each step in C, since it will be faster to compute the a,b,c
        # matrices there.
        phi = int_c.implicit_1Dx(phi,
                                 xx,
                                 nu,
                                 gamma,
                                 h,
                                 beta,
                                 this_dt,
                                 use_delj_trick=use_delj_trick)
        current_t = next_t
    return phi
Exemplo n.º 7
0
import numpy
import dadi
import Plotting_Functions
from dadi import Numerics, PhiManip, Integration, Misc
from dadi.Spectrum_mod import Spectrum

#===========================================================================
# Import data to create joint-site frequency spectrum
#===========================================================================

#**************
#path to your input file
snps = "/Users/portik/Documents/GitHub/dadi_pipeline/Two_Population_Pipeline/Example_Data/dadi_2pops_North_South_snps.txt"

#Create python dictionary from snps file
dd = Misc.make_data_dict(snps)

#**************
#pop_ids is a list which should match the populations headers of your SNPs file columns
pop_ids = ["North", "South"]

#**************
#projection sizes, in ALLELES not individuals
proj = [16, 32]

#Convert this dictionary into folded AFS object
#[polarized = False] creates folded spectrum object
fs = Spectrum.from_data_dict(dd,
                             pop_ids=pop_ids,
                             projections=proj,
                             polarized=False)
Exemplo n.º 8
0
def _object_func(params,
                 data1,
                 data2,
                 cache1,
                 cache2,
                 model_func,
                 sel_dist,
                 scal_fac1,
                 scal_fac2,
                 theta1,
                 theta2,
                 lower_bound=None,
                 upper_bound=None,
                 verbose=0,
                 multinom=False,
                 flush_delay=0,
                 func_args=[],
                 func_kwargs={},
                 fixed_params1=None,
                 fixed_params2=None,
                 ll_scale=1,
                 output_stream=sys.stdout,
                 store_thetas=False):
    """
    Objective function for optimization.
    """
    global _counter
    _counter += 1

    # Scaling factors scales sel_dist differently for species 1 and species 2

    sel_dist1 = copy_func(
        sel_dist, defaults=scal_fac1)  # scal_fac1 should be 2*Nea of pop 1
    sel_dist2 = copy_func(
        sel_dist, defaults=scal_fac2)  # scal_fac2 should be 4*Nea of pop 2

    # Deal with fixed parameters
    params_up1 = Inference._project_params_up(params, fixed_params1)
    params_up2 = Inference._project_params_up(params, fixed_params2)

    # Check our parameter bounds
    if lower_bound is not None:
        for pval, bound in zip(params_up1, lower_bound):
            if bound is not None and pval < bound:
                return -_out_of_bounds_val / ll_scale
    if upper_bound is not None:
        for pval, bound in zip(params_up1, upper_bound):
            if bound is not None and pval > bound:
                return -_out_of_bounds_val / ll_scale

    ns1 = data1.sample_sizes
    ns2 = data2.sample_sizes
    all_args1 = [params_up1, ns1, sel_dist1, theta1, cache1] + list(func_args)
    all_args2 = [params_up2, ns2, sel_dist2, theta2, cache2] + list(func_args)
    # Pass the pts argument via keyword, but don't alter the passed-in
    # func_kwargs
    #func_kwargs = func_kwargs.copy()
    #func_kwargs['pts'] = pts
    sfs1 = model_func(*all_args1, **func_kwargs)
    sfs2 = model_func(*all_args2, **func_kwargs)
    if multinom:
        result = Inference.ll_multinom(sfs1, data1) + Inference.ll_multinom(
            sfs2, data2)
    else:
        result = Inference.ll(sfs1, data1) + Inference.ll(sfs2, data2)

    # Bad result
    if numpy.isnan(result):
        result = _out_of_bounds_val

    if (verbose > 0) and (_counter % verbose == 0):
        param_str = 'array([%s])' % (', '.join(
            ['%- 12g' % v for v in params_up1]))
        output_stream.write('%-8i, %-12g, %s%s' %
                            (_counter, result, param_str, os.linesep))
        Misc.delayed_flush(delay=flush_delay)

    return -result / ll_scale