Exemplo n.º 1
0
    def test_from_file(self):
        """
        Loading spectrum from file.
        """
        commentsin = ['comment 1', 'comment 2']
        filename = 'test.fs'
        data = numpy.random.rand(3, 3)

        fsin = dadi.Spectrum(data)
        fsin.to_file(filename, comment_lines=commentsin)

        # Read the file.
        fsout, commentsout = dadi.Spectrum.from_file(filename,
                                                     return_comments=True)
        os.remove(filename)
        # Ensure that fs was read correctly.
        self.assert_(numpy.allclose(fsout.data, fsin.data))
        self.assert_(numpy.all(fsout.mask == fsin.mask))
        self.assertEqual(fsout.folded, fsin.folded)
        # Ensure comments were read correctly.
        for ii, line in enumerate(commentsin):
            self.assertEqual(line, commentsout[ii])

        # Test using old file format
        fsin.to_file(filename, comment_lines=commentsin, foldmaskinfo=False)

        # Read the file.
        fsout, commentsout = dadi.Spectrum.from_file(filename,
                                                     return_comments=True)
        os.remove(filename)
        # Ensure that fs was read correctly.
        self.assert_(numpy.allclose(fsout.data, fsin.data))
        self.assert_(numpy.all(fsout.mask == fsin.mask))
        self.assertEqual(fsout.folded, fsin.folded)
        # Ensure comments were read correctly.
        for ii, line in enumerate(commentsin):
            self.assertEqual(line, commentsout[ii])

        #
        # Now test a file with folding and masking
        #
        fsin = dadi.Spectrum(data).fold()
        fsin.mask[0, 1] = True
        fsin.to_file(filename)

        fsout = dadi.Spectrum.from_file(filename)
        os.remove(filename)

        # Ensure that fs was read correctly.
        self.assert_(numpy.allclose(fsout.data, fsin.data))
        self.assert_(numpy.all(fsout.mask == fsin.mask))
        self.assertEqual(fsout.folded, fsin.folded)
Exemplo n.º 2
0
def intersect_masks(m1, m2):
    """
    Versions of m1 and m2 that are masked where either m1 or m2 were masked.

    If neither m1 or m2 is masked, just returns m1 and m2. Otherwise returns
    m1 and m2 wrapped as masked_arrays with identical masks.
    """
    ma = numpy.ma
    import dadi
    if ma.isMaskedArray(m1) or ma.isMaskedArray(m2):
        joint_mask = ma.mask_or(ma.getmask(m1), ma.getmask(m2))

        m1 = dadi.Spectrum(m1, mask=joint_mask.copy())
        m2 = dadi.Spectrum(m2, mask=joint_mask.copy())
    return m1,m2
Exemplo n.º 3
0
def _generate_dadi_fs(neu_fs, nonneu_fs, output):
    """
    Description:
        Outputs frequency spectra for dadi.

    Arguments:
        neu_fs numpy.ndarray: Frequency spectrum for neutral mutations.
        nonneu_fs numpy.ndarray: Frequency spectrum for non-neutral mutations.
        output list: Names of output files.
    """
    neu_fs = dadi.Spectrum(neu_fs)
    nonneu_fs = dadi.Spectrum(nonneu_fs)

    neu_fs.to_file(output[0])
    nonneu_fs.to_file(output[1])
Exemplo n.º 4
0
    def test_unfolding(self):
        ns = (3, 4)

        # We add some unusual masking.
        fs = dadi.Spectrum(numpy.random.uniform(size=ns))
        fs.mask[0, 1] = fs.mask[1, 1] = True

        folded = fs.fold()
        unfolded = folded.unfold()

        # Check that it was properly recorded
        self.assertFalse(unfolded.folded)

        # Check that no data was lost
        self.assertAlmostEqual(fs.data.sum(), folded.data.sum())
        self.assertAlmostEqual(fs.data.sum(), unfolded.data.sum())

        # Note that fs.sum() need not be equal to folded.sum(), if fs had
        # some masked values.
        self.assertAlmostEqual(folded.sum(), unfolded.sum())

        # Check that the proper entries are masked.
        self.assertTrue(unfolded.mask[0, 1])
        self.assertTrue(unfolded.mask[(ns[0] - 1), (ns[1] - 1) - 1])
        self.assertTrue(unfolded.mask[1, 1])
        self.assertTrue(unfolded.mask[(ns[0] - 1) - 1, (ns[1] - 1) - 1])
Exemplo n.º 5
0
def dadi_multiSFS(dd, pops, proj, unfold, outdir, prefix, dtype):
    print("Doing multiSFS for all pops")
    dadi_dir = os.path.join(outdir, "dadi")
    fsc_dir = os.path.join(outdir, "fastsimcoal2")
    dadi_multi_filename = os.path.join(dadi_dir, "-".join(pops) + ".sfs")

    ## Get the multiSFS
    fs = dadi.Spectrum.from_data_dict(dd, pops, proj, polarized=unfold)

    ## Do int bins rather than float
    if dtype == "int":
        dat = np.rint(np.array(fs.data))
        fs = dadi.Spectrum(dat,
                           data_folded=fs.folded,
                           mask=fs.mask,
                           fill_value=0,
                           dtype=int)

    ## Write out the dadi file
    fs.to_file(dadi_multi_filename)

    ## Convert to fsc multiSFS format
    fsc_multi_filename = os.path.join(fsc_dir, prefix + "_MSFS.obs")
    with open(fsc_multi_filename, 'w') as outfile:
        outfile.write(
            "1 observations. No. of demes and sample sizes are on next line.\n"
        )
        outfile.write(
            str(len(pops)) + "\t" + " ".join([str(x) for x in proj]) + "\n")
        with open(dadi_multi_filename) as infile:
            outfile.write(infile.readlines()[1])
            outfile.write("\n")
    return dadi_multi_filename
Exemplo n.º 6
0
def dadi_oneD_sfs_per_pop(dd, pops, proj, unfold, outdir, prefix, dtype):
    dadi_dir = os.path.join(outdir, "dadi")
    fsc_dir = os.path.join(outdir, "fastsimcoal2")
    M_or_D = "D" if unfold else "M"
    for i, pop in enumerate(pops):
        print("Doing 1D sfs - {}".format(pop))
        dadi_sfs_file = os.path.join(dadi_dir,
                                     pop + "-" + str(proj[i]) + ".sfs")

        fs = dadi.Spectrum.from_data_dict(dd, [pop], [proj[i]],
                                          mask_corners=True,
                                          polarized=unfold)

        ## Do int bins rather than float
        if dtype == "int":
            dat = np.rint(np.array(fs.data))
            fs = dadi.Spectrum(dat,
                               data_folded=fs.folded,
                               mask=fs.mask,
                               fill_value=0,
                               dtype=int)

        fs.to_file(dadi_sfs_file)

        ## Convert each 1D sfs to fsc format
        fsc_oneD_filename = os.path.join(fsc_dir,
                                         pop + "_{}AFpop0.obs".format(M_or_D))
        with open(fsc_oneD_filename, 'w') as outfile:
            outfile.write("1 observation\n")
            outfile.write(
                "\t".join(["d0_" + str(x) for x in range(proj[i] + 1)]) + "\n")
            ## Grab the fs data from the dadi sfs
            with open(dadi_sfs_file) as infile:
                outfile.write(infile.readlines()[1])
                outfile.write("\n")
Exemplo n.º 7
0
def compare_msprime_dadi_OutOfAfrica(input_fids, output_path, sample_size=20):
    #For parameter reference
    #p0 = [nuAf, nuB, nuEu0, nuEu, nuAs0, nuAs, mAfB, mAfEu, mAfAs, mEuAs, TAf, TB, TEuAs]
    OoA_popt = [
        1.68, 0.287, 0.129, 3.74, 0.070, 7.29, 3.65, 0.44, 0.28, 1.40, 0.607,
        0.396, 0.058
    ]
    OoA_pts_l = [30, 40, 50]
    OoA_ns = [20, 20, 20]
    OoA_extrap_func = dadi.Numerics.make_extrap_func(OoA_func)
    OoA_model = OoA_extrap_func(OoA_popt, OoA_ns, OoA_pts_l)
    OoA_model = OoA_model.marginalize([2])

    msprime_joint_sfs = dadi.Spectrum([[0] * (sample_size + 1)] *
                                      (sample_size + 1))

    for fid in input_fids:
        msprime_joint_sfs_temp = dadi.Spectrum.from_file(fid)
        msprime_joint_sfs += msprime_joint_sfs_temp

    fig = plt.figure(219033)
    fig.clear()
    dadi.Plotting.plot_2d_comp_multinom(OoA_model,
                                        msprime_joint_sfs,
                                        vmin=1,
                                        resid_range=50,
                                        show=False)
    fig.savefig(output_path)
Exemplo n.º 8
0
    def test_1d_ic(self):
        # This just the standard neutral model
        func_ex = dadi.Numerics.make_extrap_log_func(dadi.Demographics1D.snm)
        fs = func_ex([], (17, ), [100, 120, 140])

        answer = dadi.Spectrum(1. / numpy.arange(18))

        self.assert_(numpy.ma.allclose(fs, answer, atol=1e-3))
Exemplo n.º 9
0
def parse_fold_sfs(sfs, sampleSize = 0, fold=True, maskSingletons=False):   # if fold=True, the sfs must already be a folded SFS!
    if fold==True:
        addZeroLength = sampleSize - len(sfs)
        if addZeroLength < 1: return("Wrong use of parse_fold_sfs function!")
        sfsout = [0.] + sfs + [0.]*addZeroLength
        if maskSingletons==True:
            sfsout = dadi.Spectrum(sfsout, data_folded=True, mask = [True]*2 + [False]*(len(sfs)-1) + [True]*addZeroLength)
        else:
            sfsout = dadi.Spectrum(sfsout, data_folded=True, mask = [True] + [False]*len(sfs) + [True]*addZeroLength)
    else:
        if sampleSize != len(sfs) + 1: return("sampleSize not equal 1+length(sfs)!")
        sfsout = [0.] + sfs + [0.]
        if maskSingletons==True:
            sfsout = dadi.Spectrum(sfsout, data_folded=False, mask = [True]*2 + [False]*(len(sfs)-1))
        else:
            sfsout = dadi.Spectrum(sfsout, data_folded=False, mask = [True] + [False]*len(sfs))
    return sfsout
Exemplo n.º 10
0
    def test_1d_stationary(self):
        func_ex = dadi.Numerics.\
                make_extrap_log_func(dadi.Demographics1D.two_epoch)
        # We let a two-epoch model equilibrate for tau=10, which should
        # eliminate almost all traces of the size change.
        fs = func_ex((0.5, 10), (17, ), [40, 50, 60])
        answer = dadi.Spectrum(0.5 / numpy.arange(18))

        self.assert_(numpy.ma.allclose(fs, answer, atol=1e-2))
Exemplo n.º 11
0
def ts_to_dadi_sfs(ts_path,
                   out_path,
                   out_path_nonvariant,
                   sample_size=20,
                   mask_file=None):
    '''
	Generate however many different SFS with msprime and convert+save them into SFS for dadi to use.
	'''
    ts = tskit.load(ts_path)

    #haps_pops_joint = np.array(ts.genotype_matrix())

    haps = ts.genotype_matrix()

    total_length = ts.sequence_length

    # Masking
    retain = np.full(ts.get_num_mutations(), False)
    if mask_file:
        mask_table = pd.read_csv(mask_file, sep="\t", header=None)
        chrom = ts_path.split("/")[-1].split(".")[0]
        sub = mask_table[mask_table[0] == chrom]
        mask_ints = pd.IntervalIndex.from_arrays(sub[1], sub[2])
        snp_locs = [int(x.site.position) for x in ts.variants()]
        tmp_bool = [mask_ints.contains(x) for x in snp_locs]
        retain = np.logical_or(retain, tmp_bool)
        #print(retain)
        total_length -= np.sum(mask_ints.length)
    #print(ts.sequence_length)
    #print(total_length)

    retain = np.logical_not(retain)

    haps_pops_joint = np.array(haps[retain, :])

    #Break up the haplotypes into seperate populations based on sample_size
    haps_pop0_joint = haps_pops_joint[:, :sample_size]
    haps_pop1_joint = haps_pops_joint[:, sample_size:]

    genotypes_pop0_joint = allel.HaplotypeArray(haps_pop0_joint).to_genotypes(
        ploidy=2)
    allele_counts_pop0_joint = genotypes_pop0_joint.count_alleles()
    genotypes_pop1_joint = allel.HaplotypeArray(haps_pop1_joint).to_genotypes(
        ploidy=2)
    allele_counts_pop1_joint = genotypes_pop1_joint.count_alleles()

    sfs_joint = allel.joint_sfs(allele_counts_pop0_joint[:, 1],
                                allele_counts_pop1_joint[:, 1])
    num_sites = sum(sum(sfs_joint))
    #print(ts.num_sites)
    sfs_joint = dadi.Spectrum(sfs_joint)
    sfs_joint.to_file(out_path)
    sfs_joint[
        0,
        0] = total_length - num_sites  # need to get the number of nonvariant sites for the [0,0] entry
    sfs_joint.to_file(out_path_nonvariant)
Exemplo n.º 12
0
    def test_marginalize(self):
        ns = (7, 8, 6)

        fs = dadi.Spectrum(numpy.random.uniform(size=ns))
        folded = fs.fold()

        marg1 = fs.marginalize([1])
        # Do manual marginalization.
        manual = dadi.Spectrum(fs.data.sum(axis=1))

        # Check that these are equal in the unmasked entries.
        self.assert_(
            numpy.allclose(numpy.where(marg1.mask, 0, marg1.data),
                           numpy.where(manual.mask, 0, manual.data)))

        # Check folded Spectrum objects. I should get the same result if I
        # marginalize then fold, as if I fold then marginalize.
        mf1 = marg1.fold()
        mf2 = folded.marginalize([1])
        self.assert_(numpy.allclose(mf1, mf2))
Exemplo n.º 13
0
    def marginalB(self):
        """
        Marginal 1D frequency spectrum for B locus.
        """
        ns = self.shape[0] - 1
        marg = dadi.Spectrum(np.zeros(ns + 1))
        for fAB in range(ns):
            for faB in range(ns - fAB):
                marg[fAB + faB] += self[fAB, :, faB].sum()

        marg.extrap_x = self.extrap_x
        marg.extrap_t = self.extrap_t
        return marg
Exemplo n.º 14
0
    def test_folded_slices(self):
        ns = (3, 4)
        fs1 = dadi.Spectrum(numpy.random.rand(*ns))
        folded1 = fs1.fold()

        self.assert_(fs1[:].folded == False)
        self.assert_(folded1[:].folded == True)

        self.assert_(fs1[0].folded == False)
        self.assert_(folded1[1].folded == True)

        self.assert_(fs1[:, 0].folded == False)
        self.assert_(folded1[:, 1].folded == True)
Exemplo n.º 15
0
 def test_masked_folding(self):
     """
     Test folding when the minor allele is ambiguous.
     """
     data = numpy.zeros((5, 6))
     fs = dadi.Spectrum(data)
     # This folds to an entry that will already be masked.
     fs.mask[1, 2] = True
     # This folds to (1,1), which needs to be masked.
     fs.mask[3, 4] = True
     ff = fs.fold()
     # Ensure that all those are masked.
     for entry in [(1, 2), (3, 4), (1, 1)]:
         self.assert_(ff.mask[entry])
Exemplo n.º 16
0
    def test_to_file(self):
        """
        Saving spectrum to file.
        """
        comments = ['comment 1', 'comment 2']
        filename = 'test.fs'
        data = numpy.random.rand(3, 3)

        fs = dadi.Spectrum(data)

        fs.to_file(filename, comment_lines=comments)
        os.remove(filename)

        fs.to_file(filename, comment_lines=comments, foldmaskinfo=False)
        os.remove(filename)
Exemplo n.º 17
0
def _from_phi_1D_direct(phi, n, xx, mask_corners=True, het_ascertained=None):
    """
    Compute sample Spectrum_mod.py from population frequency distribution phi.
    ns: Sequence of P sample sizes for each population.
    xx: Sequence of P one-dimensional grids on which phi is defined.
    See from_phi for explanation of arguments.
    """
    n = round(n)
    data = np.zeros(n + 1)
    for ii in range(0, n + 1):
        factorx = scipy.special.comb(n, ii) * xx**ii * (1 - xx)**(n - ii)
        if het_ascertained == 'xx':
            factorx *= xx * (1 - xx)
        data[ii] = trapz(factorx * phi, xx)
    return dadi.Spectrum(data, mask_corners=mask_corners)
Exemplo n.º 18
0
def _fold(spectrum):
    spectrum = dadi.Spectrum(spectrum)
    if spectrum.mask[1, 2] == True:
        print "error: trying to fold a spectrum that is already folded"
        return spectrum
    else:
        spectrum = (spectrum + np.transpose(spectrum))
        for ii in range(len(spectrum)):
            spectrum[ii, ii] /= 2
        spectrum.mask[0, :] = True
        spectrum.mask[:, 0] = True
        for ii in range(len(spectrum)):
            spectrum.mask[ii, ii + 1:] = True
            spectrum.mask[ii, len(spectrum) - 1 - ii:] = True
        return spectrum
Exemplo n.º 19
0
def _from_phi_1D_direct_dphi_directly(n,
                                      xx,
                                      mask_corners=True,
                                      het_ascertained=None):
    """
    Compute derivative from sample Spectrum_mod.py from population frequency distribution phi.
    """
    data = np.zeros(
        n + 1
    )  # for example 20 samples, there are 21 element, - 0 - mutations for 0 samples
    for ii in range(0, n + 1):
        factorx = scipy.special.comb(n, ii) * xx**ii * (1 - xx)**(n - ii)
        if het_ascertained == 'xx':
            factorx *= xx * (1 - xx)
        data[ii] = trapz(factorx, xx)
    return dadi.Spectrum(data, mask_corners=mask_corners)
Exemplo n.º 20
0
    def test_ambiguous_folding(self):
        """
        Test folding when the minor allele is ambiguous.
        """
        data = numpy.zeros((4, 4))
        # Both these entries correspond to a an allele seen in 3 of 6 samples.
        # So the minor allele is ambiguous. In this case, we average the two
        # possible assignments.
        data[0, 3] = 1
        data[3, 0] = 3
        fs = dadi.Spectrum(data)
        ff = fs.fold()

        correct = numpy.zeros((4, 4))
        correct[0, 3] = correct[3, 0] = 2
        self.assert_(numpy.allclose(correct, ff.data))
Exemplo n.º 21
0
def _from_phi_1D_direct_dphi_directly(n,
                                      xx,
                                      mask_corners=True,
                                      het_ascertained=None):
    """
    Compute derivative from sample Spectrum_mod.py from population frequency distribution phi.
    See from_phi for explanation of arguments.
    """
    n = round(n)
    data = np.zeros(n + 1)
    for ii in range(0, n + 1):
        factorx = scipy.special.comb(n, ii) * xx**ii * (1 - xx)**(n - ii)
        if het_ascertained == 'xx':
            factorx *= xx * (1 - xx)
        data[ii] = trapz(factorx, xx)
    return dadi.Spectrum(data, mask_corners=mask_corners)
Exemplo n.º 22
0
    def test_folding(self):
        """
        Folding a 2D spectrum.
        """
        data = numpy.reshape(numpy.arange(12), (3, 4))
        fs = dadi.Spectrum(data)
        ff = fs.fold()

        # Ensure no SNPs have gotten lost.
        self.assertAlmostEqual(fs.sum(), ff.sum(), 6)
        self.assertAlmostEqual(fs.data.sum(), ff.data.sum(), 6)
        # Ensure that the empty entries are actually empty.
        self.assert_(numpy.all(ff.data[::-1] == numpy.tril(ff.data[::-1])))

        # This turns out to be the correct result.
        correct = numpy.tri(4)[::-1][-3:] * 11
        self.assert_(numpy.allclose(correct, ff.data))
Exemplo n.º 23
0
    def test_projection(self):
        # Test that projecting a multi-dimensional Spectrum succeeds
        ns = (7, 8, 6)
        fs = dadi.Spectrum(numpy.random.uniform(size=ns))
        p = fs.project([3, 4, 5])
        # Also that we don't lose any data
        self.assertAlmostEqual(fs.data.sum(), p.data.sum())

        # Check that when I project an equilibrium spectrum, I get back an
        # equilibrium spectrum
        fs = dadi.Spectrum(1. / numpy.arange(100))
        p = fs.project([17])
        self.assert_(numpy.allclose(p[1:-1], 1. / numpy.arange(1, len(p) - 1)))

        # Check that masked values are propagated correctly.
        fs = dadi.Spectrum(1. / numpy.arange(20))
        # All values with 3 or fewer observed should be masked.
        fs.mask[3] = True
        p = fs.project([10])
        self.assert_(numpy.all(p.mask[:4]))

        # Check that masked values are propagated correctly.
        fs = dadi.Spectrum(1. / numpy.arange(20))
        fs.mask[-3] = True
        # All values with 3 or fewer observed should be masked.
        p = fs.project([10])
        self.assert_(numpy.all(p.mask[-3:]))

        # A more complicated two dimensional projection problem...
        fs = dadi.Spectrum(numpy.random.uniform(size=(9, 7)))
        fs.mask[2, 3] = True
        p = fs.project([4, 4])
        self.assert_(numpy.all(p.mask[:3, 1:4]))

        # Test that projecting a folded multi-dimensional Spectrum succeeds
        # Should get the same result if I fold then project as if I project
        # then fold.
        ns = (7, 8, 6)
        fs = dadi.Spectrum(numpy.random.uniform(size=ns))
        fs.mask[2, 3, 1] = True
        folded = fs.fold()

        p = fs.project([3, 4, 5])
        pf1 = p.fold()
        pf2 = folded.project([3, 4, 5])

        # Check equality
        self.assert_(numpy.all(pf1.mask == pf2.mask))
        self.assert_(numpy.allclose(pf1.data, pf2.data))
Exemplo n.º 24
0
def msprime_to_dadi_simulation(path, seed, org, chrom, sample_size=20):
    '''
	Generate however many different SFS with msprime and convert+save them into SFS for dadi to use.
	'''
    #For testing
    # print(path, seed, chrom, sample_size)
    # chrom = homo_sapiens.genome.chromosomes[chrom]
    # model = homo_sapiens.GutenkunstThreePopOutOfAfrica()
    chrom = getattr(stdpopsim,
                    '_'.join(org.split('_')[:-1])).genome.chromosomes[chrom]
    model = getattr(getattr(stdpopsim, '_'.join(org.split('_')[:-1])),
                    org.split('_')[-1:][0])()

    samples_pops_joint = [
        msprime.Sample(population=0, time=0)
    ] * sample_size + [msprime.Sample(population=1, time=0)] * sample_size
    ts_pops_joint = msprime.simulate(
        samples=samples_pops_joint,
        recombination_map=chrom.recombination_map(),
        mutation_rate=chrom.default_mutation_rate,
        random_seed=seed,
        **model.asdict())
    haps_pops_joint = np.array(ts_pops_joint.genotype_matrix())

    #Break up the haplotypes into seperate populations based on sample_size
    haps_pop0_joint = haps_pops_joint[:, :sample_size]
    haps_pop1_joint = haps_pops_joint[:, sample_size:]

    genotypes_pop0_joint = allel.HaplotypeArray(haps_pop0_joint).to_genotypes(
        ploidy=2)
    allele_counts_pop0_joint = genotypes_pop0_joint.count_alleles()
    genotypes_pop1_joint = allel.HaplotypeArray(haps_pop1_joint).to_genotypes(
        ploidy=2)
    allele_counts_pop1_joint = genotypes_pop1_joint.count_alleles()

    sfs_joint = allel.joint_sfs(allele_counts_pop0_joint[:, 1],
                                allele_counts_pop1_joint[:, 1])
    sfs_joint = dadi.Spectrum(sfs_joint)

    sfs_joint.to_file(path)
Exemplo n.º 25
0
def _from_phi_1D_direct_dphi_analytical(n,
                                        xx,
                                        dfactor,
                                        mask_corners=True,
                                        het_ascertained=None):
    """
    Compute sample Spectrum_mod.py from population frequency distribution phi.
    See from_phi for explanation of arguments.
    """
    """ test failed """
    n = round(n)
    delta_dfactor = np.diff(dfactor)
    double_delta_xx = np.diff(xx, 2)
    data = np.zeros(n)
    for ii in range(0, n):
        factorx = scipy.special.comb(n, ii) * xx**ii * (1 - xx)**(n - ii)
        if het_ascertained == 'xx':
            factorx *= xx * (1 - xx)
        # data[ii] = trapz(factorx, double_delta_xx/2)
        # data[ii] *= double_delta_xx/2
        data[ii] *= delta_dfactor / 2
    return dadi.Spectrum(data, mask_corners=mask_corners)
Exemplo n.º 26
0
def alt_mut_mech_sample_spectrum(ns):
    """
    alternate mutation mechanism, mutations inserted at [1,1]
    turns out that changing population size does not effect the distribution of mutations entering the population this way
    we implement Jenkins et al (2014) exact solution
    this is for neutral spectrum only, for selected spectrum, integrate as above with lam = 1
    ns - number of sampled individuals from the population
    """
    fs = np.zeros((ns + 1, ns + 1))
    for ii in range(ns)[1:]:
        for jj in range(ns)[1:]:
            if ii + jj < ns:
                na = ns - ii - jj
                fs[ii,
                   jj] = 2 * ns / (ns - 2) * 1. / ((ns - na - 1) * (ns - na) *
                                                   (ns - na + 1))
    fs = dadi.Spectrum(fs)
    fs[:, 0].mask = True
    fs[0, :].mask = True
    for ii in range(len(fs)):
        fs.mask[ii, ns - ii:] = True
    return fs
Exemplo n.º 27
0
def sfs_from_binomial(mutdf,
                      sub,
                      cutoff=1,
                      samples=10000,
                      maxd=2000,
                      mind=0,
                      mode='MyAnn',
                      germ=False):
    if not germ:
        sfvc = mutdf[(mutdf.SampleFreq < cutoff) & (mutdf.Depth > mind) &
                     (mutdf.Depth < maxd) & (mutdf[mode] == sub) &
                     (mutdf.PredFreq > 1e-6)].PredFreq.apply(
                         get_binom,
                         samples=samples).apply(np.around).value_counts()
    else:
        sfvc = mutdf[(mutdf.SampleFreq >= cutoff) & (mutdf.Depth > mind) &
                     (mutdf.Depth < maxd) & (mutdf[mode] == sub) &
                     (mutdf.PredFreq > 1e-6)].PredFreq.apply(
                         get_binom,
                         samples=samples).apply(np.around).value_counts()
    afs = [sfvc[i] if i in sfvc.index else 0 for i in range(0, samples + 1)]
    return dadi.Spectrum(afs)
Exemplo n.º 28
0
def dadi_to_fsc_sfs(sfs_files, dadi_out_path, fsc_out_path, sample_size=20):

    ## get data sfs files and make them into one joint sfs and save
    msprime_joint_sfs = dadi.Spectrum([[0] * (sample_size + 1)] *
                                      (sample_size + 1))

    for fid in sfs_files:
        msprime_joint_sfs += dadi.Spectrum.from_file(fid)

    msprime_joint_sfs.to_file(dadi_out_path)

    ## convert dadi 2D sfs to FSC 2D sfs
    ## NB: FSC joint format file names look like this: <prefix>_jointMAFpop1_0.obs
    ## Where the first pop specified is listed in the rows and the second pop
    ## specified is listed in the columns.
    with open(fsc_out_path, 'w') as outfile:
        outfile.write("1 observation\n")
        ## Format column headers (i.e. d0_0 d0_1 d0_2 .. d0_n for deme 0 up to sample size of n)
        outfile.write(
            "\t" + "\t".join(["d0_" + str(x)
                              for x in range(sample_size + 1)]) + "\n")

        ## Format row headers
        row_headers = ["d1_" + str(x) for x in range(sample_size + 1)]

        with open(dadi_out_path) as infile:
            ## Get the second line of the dadi-style sfs which contains the data
            row_data = infile.readlines()[1].split()
            row_size = sample_size + 1
            ## Slice the row data into evenly sized chunks based on the number of columns
            rows = [
                row_data[i:i + row_size]
                for i in range(0, len(row_data), row_size)
            ]
            ## Write out each row to the file
            for i, row_head in enumerate(row_headers):
                outfile.write(row_head + "\t" + " ".join(rows[i]) + "\n")
Exemplo n.º 29
0
dadi_ll_msmc_model = dadi.Inference.ll_multinom(model, fs )
optimalthetaFromDadi = dadi.Inference.optimal_sfs_scaling(model, fs) # 
header='\t'.join(str(x) for x in ("dadiLL","AnnabelLL","NancTheta","dadiOptimalTheta"))
output='\t'.join(str(x) for x in (dadi_ll_msmc_model,multinom_LL_AB,scalingTheta,optimalthetaFromDadi))
outputFile.write(('{0}\n{1}\n').format(header,output))
outputFile.close()
########## plot an image: ############
#import pylab
import matplotlib.pyplot as plt 
#fig=plt.figure(1)
#pylab.ion()
outputFigure=str(str(outdir)+"/"+str(modelName)+".expSFS.DadiScaling.figure.png")
dadi.Plotting.plot_1d_comp_multinom(model, fs)
pyplot.title((modelName))
plt.savefig(outputFigure)

######### this is currently a crappy way to do this; improve if going deeper down this path #########
############## pulling from grid search, this is the best-fit sfs (relative to theta =1 ) from dadi with T = 35 for AL ##########
modelName="bestFitDadiModel.T35.fromGridSearch"
model = dadi.Spectrum([0, 0.78349086, 0.51350478 ,0.38226656, 0.30918362 ,0.26505468, 0.23705724 ,0.21899399, 0.20767940,
0.20143653, 0.09971952,0,0,0,0,0,0,0,0,0,0]).fold() # this is from R in my grid search for CA ; is w/in 1 pt of MLE with T = 35 gen
model_freq_fold = model/sum(model)

outputFile=open(str(outdir)+"/"+str(modelName)+".LLs.andOptimalTheta.txt","w")
multinom_LL_AB= LhoodCalc(model_freq_fold,fs,ns/2)
dadi_ll_msmc_model = dadi.Inference.ll_multinom(model, fs )
optimalthetaFromDadi = dadi.Inference.optimal_sfs_scaling(model, fs) # 
header='\t'.join(str(x) for x in ("dadiLL","AnnabelLL","NancTheta","dadiOptimalTheta"))
output='\t'.join(str(x) for x in (dadi_ll_msmc_model,multinom_LL_AB,scalingTheta,optimalthetaFromDadi))
outputFile.write(('{0}\n{1}\n').format(header,output))
outputFile.close()
Exemplo n.º 30
0
import pickle
import pylab


#############################################################################
print "Load SFS data"

f1 = open("Lyrata_SFS.txt", "r")
raw1 = f1.readlines()
f1.close()
sampleSize1 = int(raw1[1])

sfs_NS1 = numpy.array(raw1[5][:-1].split(" "), dtype='float64').tolist()
lensfs = len(sfs_NS1)
sfs_NS1 = [0.] + sfs_NS1 + numpy.zeros(shape=(sampleSize1-lensfs,)).tolist()
sfs_NS1_noMask = dadi.Spectrum(sfs_NS1, data_folded=True, mask = [True]*1 + [False]*(lensfs) + [True]*(sampleSize1-lensfs))  

sfs_S1 = numpy.array(raw1[3][:-1].split(" "), dtype='float64').tolist()
lensfs = len(sfs_S1)
sfs_S1 = [0.] + sfs_S1 + numpy.zeros(shape=(sampleSize1-lensfs,)).tolist()
sfs_S1_noMask = dadi.Spectrum(sfs_S1, data_folded=True, mask = [True]*1 + [False]*(lensfs) + [True]*(sampleSize1-lensfs))  

f2 = open("Thaliana_SFS.txt", "r")
raw2 = f2.readlines()
f2.close()
sampleSize2 = int(raw2[1])

sfs_NS2 = numpy.array(raw2[5][:-1].split(" "), dtype='float64').tolist()
lensfs = len(sfs_NS2)
sfs_NS2 = [0.] + sfs_NS2 + numpy.zeros(shape=(sampleSize2-lensfs,)).tolist()
sfs_NS2_noMask = dadi.Spectrum(sfs_NS2, data_folded=True, mask = [True]*1 + [False]*(lensfs) + [True]*(sampleSize2-lensfs))  # Mask singletons!!