Exemplos de Spectrum em Python, exemplos de dadi.Spectrum em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_Spectrum.py Projeto: crougeux/Dadi_v1.6.3_modif

    def test_from_file(self):
        """
        Loading spectrum from file.
        """
        commentsin = ['comment 1', 'comment 2']
        filename = 'test.fs'
        data = numpy.random.rand(3, 3)

        fsin = dadi.Spectrum(data)
        fsin.to_file(filename, comment_lines=commentsin)

        # Read the file.
        fsout, commentsout = dadi.Spectrum.from_file(filename,
                                                     return_comments=True)
        os.remove(filename)
        # Ensure that fs was read correctly.
        self.assert_(numpy.allclose(fsout.data, fsin.data))
        self.assert_(numpy.all(fsout.mask == fsin.mask))
        self.assertEqual(fsout.folded, fsin.folded)
        # Ensure comments were read correctly.
        for ii, line in enumerate(commentsin):
            self.assertEqual(line, commentsout[ii])

        # Test using old file format
        fsin.to_file(filename, comment_lines=commentsin, foldmaskinfo=False)

        # Read the file.
        fsout, commentsout = dadi.Spectrum.from_file(filename,
                                                     return_comments=True)
        os.remove(filename)
        # Ensure that fs was read correctly.
        self.assert_(numpy.allclose(fsout.data, fsin.data))
        self.assert_(numpy.all(fsout.mask == fsin.mask))
        self.assertEqual(fsout.folded, fsin.folded)
        # Ensure comments were read correctly.
        for ii, line in enumerate(commentsin):
            self.assertEqual(line, commentsout[ii])

        #
        # Now test a file with folding and masking
        #
        fsin = dadi.Spectrum(data).fold()
        fsin.mask[0, 1] = True
        fsin.to_file(filename)

        fsout = dadi.Spectrum.from_file(filename)
        os.remove(filename)

        # Ensure that fs was read correctly.
        self.assert_(numpy.allclose(fsout.data, fsin.data))
        self.assert_(numpy.all(fsout.mask == fsin.mask))
        self.assertEqual(fsout.folded, fsin.folded)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: Numerics.py Projeto: crougeux/Dadi_v1.6.3_modif

def intersect_masks(m1, m2):
    """
    Versions of m1 and m2 that are masked where either m1 or m2 were masked.

    If neither m1 or m2 is masked, just returns m1 and m2. Otherwise returns
    m1 and m2 wrapped as masked_arrays with identical masks.
    """
    ma = numpy.ma
    import dadi
    if ma.isMaskedArray(m1) or ma.isMaskedArray(m2):
        joint_mask = ma.mask_or(ma.getmask(m1), ma.getmask(m2))

        m1 = dadi.Spectrum(m1, mask=joint_mask.copy())
        m2 = dadi.Spectrum(m2, mask=joint_mask.copy())
    return m1,m2

Exemplo n.º 3

0

Exibir arquivo

Arquivo: ts2fs.py Projeto: popsim-consortium/analysis2

def _generate_dadi_fs(neu_fs, nonneu_fs, output):
    """
    Description:
        Outputs frequency spectra for dadi.

    Arguments:
        neu_fs numpy.ndarray: Frequency spectrum for neutral mutations.
        nonneu_fs numpy.ndarray: Frequency spectrum for non-neutral mutations.
        output list: Names of output files.
    """
    neu_fs = dadi.Spectrum(neu_fs)
    nonneu_fs = dadi.Spectrum(nonneu_fs)

    neu_fs.to_file(output[0])
    nonneu_fs.to_file(output[1])

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test_Spectrum.py Projeto: crougeux/Dadi_v1.6.3_modif

    def test_unfolding(self):
        ns = (3, 4)

        # We add some unusual masking.
        fs = dadi.Spectrum(numpy.random.uniform(size=ns))
        fs.mask[0, 1] = fs.mask[1, 1] = True

        folded = fs.fold()
        unfolded = folded.unfold()

        # Check that it was properly recorded
        self.assertFalse(unfolded.folded)

        # Check that no data was lost
        self.assertAlmostEqual(fs.data.sum(), folded.data.sum())
        self.assertAlmostEqual(fs.data.sum(), unfolded.data.sum())

        # Note that fs.sum() need not be equal to folded.sum(), if fs had
        # some masked values.
        self.assertAlmostEqual(folded.sum(), unfolded.sum())

        # Check that the proper entries are masked.
        self.assertTrue(unfolded.mask[0, 1])
        self.assertTrue(unfolded.mask[(ns[0] - 1), (ns[1] - 1) - 1])
        self.assertTrue(unfolded.mask[1, 1])
        self.assertTrue(unfolded.mask[(ns[0] - 1) - 1, (ns[1] - 1) - 1])

Exemplo n.º 5

0

Exibir arquivo

Arquivo: easySFS.py Projeto: daxiw/HPGAP_HPC

def dadi_multiSFS(dd, pops, proj, unfold, outdir, prefix, dtype):
    print("Doing multiSFS for all pops")
    dadi_dir = os.path.join(outdir, "dadi")
    fsc_dir = os.path.join(outdir, "fastsimcoal2")
    dadi_multi_filename = os.path.join(dadi_dir, "-".join(pops) + ".sfs")

    ## Get the multiSFS
    fs = dadi.Spectrum.from_data_dict(dd, pops, proj, polarized=unfold)

    ## Do int bins rather than float
    if dtype == "int":
        dat = np.rint(np.array(fs.data))
        fs = dadi.Spectrum(dat,
                           data_folded=fs.folded,
                           mask=fs.mask,
                           fill_value=0,
                           dtype=int)

    ## Write out the dadi file
    fs.to_file(dadi_multi_filename)

    ## Convert to fsc multiSFS format
    fsc_multi_filename = os.path.join(fsc_dir, prefix + "_MSFS.obs")
    with open(fsc_multi_filename, 'w') as outfile:
        outfile.write(
            "1 observations. No. of demes and sample sizes are on next line.\n"
        )
        outfile.write(
            str(len(pops)) + "\t" + " ".join([str(x) for x in proj]) + "\n")
        with open(dadi_multi_filename) as infile:
            outfile.write(infile.readlines()[1])
            outfile.write("\n")
    return dadi_multi_filename

Exemplo n.º 6

0

Exibir arquivo

Arquivo: easySFS.py Projeto: daxiw/HPGAP_HPC

def dadi_oneD_sfs_per_pop(dd, pops, proj, unfold, outdir, prefix, dtype):
    dadi_dir = os.path.join(outdir, "dadi")
    fsc_dir = os.path.join(outdir, "fastsimcoal2")
    M_or_D = "D" if unfold else "M"
    for i, pop in enumerate(pops):
        print("Doing 1D sfs - {}".format(pop))
        dadi_sfs_file = os.path.join(dadi_dir,
                                     pop + "-" + str(proj[i]) + ".sfs")

        fs = dadi.Spectrum.from_data_dict(dd, [pop], [proj[i]],
                                          mask_corners=True,
                                          polarized=unfold)

        ## Do int bins rather than float
        if dtype == "int":
            dat = np.rint(np.array(fs.data))
            fs = dadi.Spectrum(dat,
                               data_folded=fs.folded,
                               mask=fs.mask,
                               fill_value=0,
                               dtype=int)

        fs.to_file(dadi_sfs_file)

        ## Convert each 1D sfs to fsc format
        fsc_oneD_filename = os.path.join(fsc_dir,
                                         pop + "_{}AFpop0.obs".format(M_or_D))
        with open(fsc_oneD_filename, 'w') as outfile:
            outfile.write("1 observation\n")
            outfile.write(
                "\t".join(["d0_" + str(x) for x in range(proj[i] + 1)]) + "\n")
            ## Grab the fs data from the dadi sfs
            with open(dadi_sfs_file) as infile:
                outfile.write(infile.readlines()[1])
                outfile.write("\n")

Exemplo n.º 7

0

Exibir arquivo

Arquivo: dadi_utils.py Projeto: ckyriazis/analysis

def compare_msprime_dadi_OutOfAfrica(input_fids, output_path, sample_size=20):
    #For parameter reference
    #p0 = [nuAf, nuB, nuEu0, nuEu, nuAs0, nuAs, mAfB, mAfEu, mAfAs, mEuAs, TAf, TB, TEuAs]
    OoA_popt = [
        1.68, 0.287, 0.129, 3.74, 0.070, 7.29, 3.65, 0.44, 0.28, 1.40, 0.607,
        0.396, 0.058
    ]
    OoA_pts_l = [30, 40, 50]
    OoA_ns = [20, 20, 20]
    OoA_extrap_func = dadi.Numerics.make_extrap_func(OoA_func)
    OoA_model = OoA_extrap_func(OoA_popt, OoA_ns, OoA_pts_l)
    OoA_model = OoA_model.marginalize([2])

    msprime_joint_sfs = dadi.Spectrum([[0] * (sample_size + 1)] *
                                      (sample_size + 1))

    for fid in input_fids:
        msprime_joint_sfs_temp = dadi.Spectrum.from_file(fid)
        msprime_joint_sfs += msprime_joint_sfs_temp

    fig = plt.figure(219033)
    fig.clear()
    dadi.Plotting.plot_2d_comp_multinom(OoA_model,
                                        msprime_joint_sfs,
                                        vmin=1,
                                        resid_range=50,
                                        show=False)
    fig.savefig(output_path)

Exemplo n.º 8

0

Exibir arquivo

    def test_1d_ic(self):
        # This just the standard neutral model
        func_ex = dadi.Numerics.make_extrap_log_func(dadi.Demographics1D.snm)
        fs = func_ex([], (17, ), [100, 120, 140])

        answer = dadi.Spectrum(1. / numpy.arange(18))

        self.assert_(numpy.ma.allclose(fs, answer, atol=1e-3))

Exemplo n.º 9

0

Exibir arquivo

def parse_fold_sfs(sfs, sampleSize = 0, fold=True, maskSingletons=False):   # if fold=True, the sfs must already be a folded SFS!
    if fold==True:
        addZeroLength = sampleSize - len(sfs)
        if addZeroLength < 1: return("Wrong use of parse_fold_sfs function!")
        sfsout = [0.] + sfs + [0.]*addZeroLength
        if maskSingletons==True:
            sfsout = dadi.Spectrum(sfsout, data_folded=True, mask = [True]*2 + [False]*(len(sfs)-1) + [True]*addZeroLength)
        else:
            sfsout = dadi.Spectrum(sfsout, data_folded=True, mask = [True] + [False]*len(sfs) + [True]*addZeroLength)
    else:
        if sampleSize != len(sfs) + 1: return("sampleSize not equal 1+length(sfs)!")
        sfsout = [0.] + sfs + [0.]
        if maskSingletons==True:
            sfsout = dadi.Spectrum(sfsout, data_folded=False, mask = [True]*2 + [False]*(len(sfs)-1))
        else:
            sfsout = dadi.Spectrum(sfsout, data_folded=False, mask = [True] + [False]*len(sfs))
    return sfsout

Exemplo n.º 10

0

Exibir arquivo

    def test_1d_stationary(self):
        func_ex = dadi.Numerics.\
                make_extrap_log_func(dadi.Demographics1D.two_epoch)
        # We let a two-epoch model equilibrate for tau=10, which should
        # eliminate almost all traces of the size change.
        fs = func_ex((0.5, 10), (17, ), [40, 50, 60])
        answer = dadi.Spectrum(0.5 / numpy.arange(18))

        self.assert_(numpy.ma.allclose(fs, answer, atol=1e-2))

Exemplo n.º 11

0

Exibir arquivo

Arquivo: dadi_utils.py Projeto: ckyriazis/analysis

def ts_to_dadi_sfs(ts_path,
                   out_path,
                   out_path_nonvariant,
                   sample_size=20,
                   mask_file=None):
    '''
	Generate however many different SFS with msprime and convert+save them into SFS for dadi to use.
	'''
    ts = tskit.load(ts_path)

    #haps_pops_joint = np.array(ts.genotype_matrix())

    haps = ts.genotype_matrix()

    total_length = ts.sequence_length

    # Masking
    retain = np.full(ts.get_num_mutations(), False)
    if mask_file:
        mask_table = pd.read_csv(mask_file, sep="\t", header=None)
        chrom = ts_path.split("/")[-1].split(".")[0]
        sub = mask_table[mask_table[0] == chrom]
        mask_ints = pd.IntervalIndex.from_arrays(sub[1], sub[2])
        snp_locs = [int(x.site.position) for x in ts.variants()]
        tmp_bool = [mask_ints.contains(x) for x in snp_locs]
        retain = np.logical_or(retain, tmp_bool)
        #print(retain)
        total_length -= np.sum(mask_ints.length)
    #print(ts.sequence_length)
    #print(total_length)

    retain = np.logical_not(retain)

    haps_pops_joint = np.array(haps[retain, :])

    #Break up the haplotypes into seperate populations based on sample_size
    haps_pop0_joint = haps_pops_joint[:, :sample_size]
    haps_pop1_joint = haps_pops_joint[:, sample_size:]

    genotypes_pop0_joint = allel.HaplotypeArray(haps_pop0_joint).to_genotypes(
        ploidy=2)
    allele_counts_pop0_joint = genotypes_pop0_joint.count_alleles()
    genotypes_pop1_joint = allel.HaplotypeArray(haps_pop1_joint).to_genotypes(
        ploidy=2)
    allele_counts_pop1_joint = genotypes_pop1_joint.count_alleles()

    sfs_joint = allel.joint_sfs(allele_counts_pop0_joint[:, 1],
                                allele_counts_pop1_joint[:, 1])
    num_sites = sum(sum(sfs_joint))
    #print(ts.num_sites)
    sfs_joint = dadi.Spectrum(sfs_joint)
    sfs_joint.to_file(out_path)
    sfs_joint[
        0,
        0] = total_length - num_sites  # need to get the number of nonvariant sites for the [0,0] entry
    sfs_joint.to_file(out_path_nonvariant)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: test_Spectrum.py Projeto: crougeux/Dadi_v1.6.3_modif

    def test_marginalize(self):
        ns = (7, 8, 6)

        fs = dadi.Spectrum(numpy.random.uniform(size=ns))
        folded = fs.fold()

        marg1 = fs.marginalize([1])
        # Do manual marginalization.
        manual = dadi.Spectrum(fs.data.sum(axis=1))

        # Check that these are equal in the unmasked entries.
        self.assert_(
            numpy.allclose(numpy.where(marg1.mask, 0, marg1.data),
                           numpy.where(manual.mask, 0, manual.data)))

        # Check folded Spectrum objects. I should get the same result if I
        # marginalize then fold, as if I fold then marginalize.
        mf1 = marg1.fold()
        mf2 = folded.marginalize([1])
        self.assert_(numpy.allclose(mf1, mf2))

Exemplo n.º 13

0

Exibir arquivo

    def marginalB(self):
        """
        Marginal 1D frequency spectrum for B locus.
        """
        ns = self.shape[0] - 1
        marg = dadi.Spectrum(np.zeros(ns + 1))
        for fAB in range(ns):
            for faB in range(ns - fAB):
                marg[fAB + faB] += self[fAB, :, faB].sum()

        marg.extrap_x = self.extrap_x
        marg.extrap_t = self.extrap_t
        return marg

Exemplo n.º 14

0

Exibir arquivo

Arquivo: test_Spectrum.py Projeto: crougeux/Dadi_v1.6.3_modif

    def test_folded_slices(self):
        ns = (3, 4)
        fs1 = dadi.Spectrum(numpy.random.rand(*ns))
        folded1 = fs1.fold()

        self.assert_(fs1[:].folded == False)
        self.assert_(folded1[:].folded == True)

        self.assert_(fs1[0].folded == False)
        self.assert_(folded1[1].folded == True)

        self.assert_(fs1[:, 0].folded == False)
        self.assert_(folded1[:, 1].folded == True)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: test_Spectrum.py Projeto: crougeux/Dadi_v1.6.3_modif

 def test_masked_folding(self):
     """
     Test folding when the minor allele is ambiguous.
     """
     data = numpy.zeros((5, 6))
     fs = dadi.Spectrum(data)
     # This folds to an entry that will already be masked.
     fs.mask[1, 2] = True
     # This folds to (1,1), which needs to be masked.
     fs.mask[3, 4] = True
     ff = fs.fold()
     # Ensure that all those are masked.
     for entry in [(1, 2), (3, 4), (1, 1)]:
         self.assert_(ff.mask[entry])

Exemplo n.º 16

0

Exibir arquivo

Arquivo: test_Spectrum.py Projeto: crougeux/Dadi_v1.6.3_modif

    def test_to_file(self):
        """
        Saving spectrum to file.
        """
        comments = ['comment 1', 'comment 2']
        filename = 'test.fs'
        data = numpy.random.rand(3, 3)

        fs = dadi.Spectrum(data)

        fs.to_file(filename, comment_lines=comments)
        os.remove(filename)

        fs.to_file(filename, comment_lines=comments, foldmaskinfo=False)
        os.remove(filename)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: asm_analytic1D.py Projeto: ctlab/dadi_adjoint_state

def _from_phi_1D_direct(phi, n, xx, mask_corners=True, het_ascertained=None):
    """
    Compute sample Spectrum_mod.py from population frequency distribution phi.
    ns: Sequence of P sample sizes for each population.
    xx: Sequence of P one-dimensional grids on which phi is defined.
    See from_phi for explanation of arguments.
    """
    n = round(n)
    data = np.zeros(n + 1)
    for ii in range(0, n + 1):
        factorx = scipy.special.comb(n, ii) * xx**ii * (1 - xx)**(n - ii)
        if het_ascertained == 'xx':
            factorx *= xx * (1 - xx)
        data[ii] = trapz(factorx * phi, xx)
    return dadi.Spectrum(data, mask_corners=mask_corners)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: plotting.py Projeto: isadorafranca/popgen

def _fold(spectrum):
    spectrum = dadi.Spectrum(spectrum)
    if spectrum.mask[1, 2] == True:
        print "error: trying to fold a spectrum that is already folded"
        return spectrum
    else:
        spectrum = (spectrum + np.transpose(spectrum))
        for ii in range(len(spectrum)):
            spectrum[ii, ii] /= 2
        spectrum.mask[0, :] = True
        spectrum.mask[:, 0] = True
        for ii in range(len(spectrum)):
            spectrum.mask[ii, ii + 1:] = True
            spectrum.mask[ii, len(spectrum) - 1 - ii:] = True
        return spectrum

Exemplo n.º 19

0

Exibir arquivo

def _from_phi_1D_direct_dphi_directly(n,
                                      xx,
                                      mask_corners=True,
                                      het_ascertained=None):
    """
    Compute derivative from sample Spectrum_mod.py from population frequency distribution phi.
    """
    data = np.zeros(
        n + 1
    )  # for example 20 samples, there are 21 element, - 0 - mutations for 0 samples
    for ii in range(0, n + 1):
        factorx = scipy.special.comb(n, ii) * xx**ii * (1 - xx)**(n - ii)
        if het_ascertained == 'xx':
            factorx *= xx * (1 - xx)
        data[ii] = trapz(factorx, xx)
    return dadi.Spectrum(data, mask_corners=mask_corners)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: test_Spectrum.py Projeto: crougeux/Dadi_v1.6.3_modif

    def test_ambiguous_folding(self):
        """
        Test folding when the minor allele is ambiguous.
        """
        data = numpy.zeros((4, 4))
        # Both these entries correspond to a an allele seen in 3 of 6 samples.
        # So the minor allele is ambiguous. In this case, we average the two
        # possible assignments.
        data[0, 3] = 1
        data[3, 0] = 3
        fs = dadi.Spectrum(data)
        ff = fs.fold()

        correct = numpy.zeros((4, 4))
        correct[0, 3] = correct[3, 0] = 2
        self.assert_(numpy.allclose(correct, ff.data))

Exemplo n.º 21

0

Exibir arquivo

Arquivo: asm_analytic1D.py Projeto: ctlab/dadi_adjoint_state

def _from_phi_1D_direct_dphi_directly(n,
                                      xx,
                                      mask_corners=True,
                                      het_ascertained=None):
    """
    Compute derivative from sample Spectrum_mod.py from population frequency distribution phi.
    See from_phi for explanation of arguments.
    """
    n = round(n)
    data = np.zeros(n + 1)
    for ii in range(0, n + 1):
        factorx = scipy.special.comb(n, ii) * xx**ii * (1 - xx)**(n - ii)
        if het_ascertained == 'xx':
            factorx *= xx * (1 - xx)
        data[ii] = trapz(factorx, xx)
    return dadi.Spectrum(data, mask_corners=mask_corners)

Exemplo n.º 22

0

Exibir arquivo

Arquivo: test_Spectrum.py Projeto: crougeux/Dadi_v1.6.3_modif

    def test_folding(self):
        """
        Folding a 2D spectrum.
        """
        data = numpy.reshape(numpy.arange(12), (3, 4))
        fs = dadi.Spectrum(data)
        ff = fs.fold()

        # Ensure no SNPs have gotten lost.
        self.assertAlmostEqual(fs.sum(), ff.sum(), 6)
        self.assertAlmostEqual(fs.data.sum(), ff.data.sum(), 6)
        # Ensure that the empty entries are actually empty.
        self.assert_(numpy.all(ff.data[::-1] == numpy.tril(ff.data[::-1])))

        # This turns out to be the correct result.
        correct = numpy.tri(4)[::-1][-3:] * 11
        self.assert_(numpy.allclose(correct, ff.data))

Exemplo n.º 23

0

Exibir arquivo

Arquivo: test_Spectrum.py Projeto: crougeux/Dadi_v1.6.3_modif

    def test_projection(self):
        # Test that projecting a multi-dimensional Spectrum succeeds
        ns = (7, 8, 6)
        fs = dadi.Spectrum(numpy.random.uniform(size=ns))
        p = fs.project([3, 4, 5])
        # Also that we don't lose any data
        self.assertAlmostEqual(fs.data.sum(), p.data.sum())

        # Check that when I project an equilibrium spectrum, I get back an
        # equilibrium spectrum
        fs = dadi.Spectrum(1. / numpy.arange(100))
        p = fs.project([17])
        self.assert_(numpy.allclose(p[1:-1], 1. / numpy.arange(1, len(p) - 1)))

        # Check that masked values are propagated correctly.
        fs = dadi.Spectrum(1. / numpy.arange(20))
        # All values with 3 or fewer observed should be masked.
        fs.mask[3] = True
        p = fs.project([10])
        self.assert_(numpy.all(p.mask[:4]))

        # Check that masked values are propagated correctly.
        fs = dadi.Spectrum(1. / numpy.arange(20))
        fs.mask[-3] = True
        # All values with 3 or fewer observed should be masked.
        p = fs.project([10])
        self.assert_(numpy.all(p.mask[-3:]))

        # A more complicated two dimensional projection problem...
        fs = dadi.Spectrum(numpy.random.uniform(size=(9, 7)))
        fs.mask[2, 3] = True
        p = fs.project([4, 4])
        self.assert_(numpy.all(p.mask[:3, 1:4]))

        # Test that projecting a folded multi-dimensional Spectrum succeeds
        # Should get the same result if I fold then project as if I project
        # then fold.
        ns = (7, 8, 6)
        fs = dadi.Spectrum(numpy.random.uniform(size=ns))
        fs.mask[2, 3, 1] = True
        folded = fs.fold()

        p = fs.project([3, 4, 5])
        pf1 = p.fold()
        pf2 = folded.project([3, 4, 5])

        # Check equality
        self.assert_(numpy.all(pf1.mask == pf2.mask))
        self.assert_(numpy.allclose(pf1.data, pf2.data))

Exemplo n.º 24

0

Exibir arquivo

def msprime_to_dadi_simulation(path, seed, org, chrom, sample_size=20):
    '''
	Generate however many different SFS with msprime and convert+save them into SFS for dadi to use.
	'''
    #For testing
    # print(path, seed, chrom, sample_size)
    # chrom = homo_sapiens.genome.chromosomes[chrom]
    # model = homo_sapiens.GutenkunstThreePopOutOfAfrica()
    chrom = getattr(stdpopsim,
                    '_'.join(org.split('_')[:-1])).genome.chromosomes[chrom]
    model = getattr(getattr(stdpopsim, '_'.join(org.split('_')[:-1])),
                    org.split('_')[-1:][0])()

    samples_pops_joint = [
        msprime.Sample(population=0, time=0)
    ] * sample_size + [msprime.Sample(population=1, time=0)] * sample_size
    ts_pops_joint = msprime.simulate(
        samples=samples_pops_joint,
        recombination_map=chrom.recombination_map(),
        mutation_rate=chrom.default_mutation_rate,
        random_seed=seed,
        **model.asdict())
    haps_pops_joint = np.array(ts_pops_joint.genotype_matrix())

    #Break up the haplotypes into seperate populations based on sample_size
    haps_pop0_joint = haps_pops_joint[:, :sample_size]
    haps_pop1_joint = haps_pops_joint[:, sample_size:]

    genotypes_pop0_joint = allel.HaplotypeArray(haps_pop0_joint).to_genotypes(
        ploidy=2)
    allele_counts_pop0_joint = genotypes_pop0_joint.count_alleles()
    genotypes_pop1_joint = allel.HaplotypeArray(haps_pop1_joint).to_genotypes(
        ploidy=2)
    allele_counts_pop1_joint = genotypes_pop1_joint.count_alleles()

    sfs_joint = allel.joint_sfs(allele_counts_pop0_joint[:, 1],
                                allele_counts_pop1_joint[:, 1])
    sfs_joint = dadi.Spectrum(sfs_joint)

    sfs_joint.to_file(path)

Exemplo n.º 25

0

Exibir arquivo

Arquivo: asm_analytic1D.py Projeto: ctlab/dadi_adjoint_state

def _from_phi_1D_direct_dphi_analytical(n,
                                        xx,
                                        dfactor,
                                        mask_corners=True,
                                        het_ascertained=None):
    """
    Compute sample Spectrum_mod.py from population frequency distribution phi.
    See from_phi for explanation of arguments.
    """
    """ test failed """
    n = round(n)
    delta_dfactor = np.diff(dfactor)
    double_delta_xx = np.diff(xx, 2)
    data = np.zeros(n)
    for ii in range(0, n):
        factorx = scipy.special.comb(n, ii) * xx**ii * (1 - xx)**(n - ii)
        if het_ascertained == 'xx':
            factorx *= xx * (1 - xx)
        # data[ii] = trapz(factorx, double_delta_xx/2)
        # data[ii] *= double_delta_xx/2
        data[ii] *= delta_dfactor / 2
    return dadi.Spectrum(data, mask_corners=mask_corners)

Exemplo n.º 26

0

Exibir arquivo

def alt_mut_mech_sample_spectrum(ns):
    """
    alternate mutation mechanism, mutations inserted at [1,1]
    turns out that changing population size does not effect the distribution of mutations entering the population this way
    we implement Jenkins et al (2014) exact solution
    this is for neutral spectrum only, for selected spectrum, integrate as above with lam = 1
    ns - number of sampled individuals from the population
    """
    fs = np.zeros((ns + 1, ns + 1))
    for ii in range(ns)[1:]:
        for jj in range(ns)[1:]:
            if ii + jj < ns:
                na = ns - ii - jj
                fs[ii,
                   jj] = 2 * ns / (ns - 2) * 1. / ((ns - na - 1) * (ns - na) *
                                                   (ns - na + 1))
    fs = dadi.Spectrum(fs)
    fs[:, 0].mask = True
    fs[0, :].mask = True
    for ii in range(len(fs)):
        fs.mask[ii, ns - ii:] = True
    return fs

Exemplo n.º 27

0

Exibir arquivo

def sfs_from_binomial(mutdf,
                      sub,
                      cutoff=1,
                      samples=10000,
                      maxd=2000,
                      mind=0,
                      mode='MyAnn',
                      germ=False):
    if not germ:
        sfvc = mutdf[(mutdf.SampleFreq < cutoff) & (mutdf.Depth > mind) &
                     (mutdf.Depth < maxd) & (mutdf[mode] == sub) &
                     (mutdf.PredFreq > 1e-6)].PredFreq.apply(
                         get_binom,
                         samples=samples).apply(np.around).value_counts()
    else:
        sfvc = mutdf[(mutdf.SampleFreq >= cutoff) & (mutdf.Depth > mind) &
                     (mutdf.Depth < maxd) & (mutdf[mode] == sub) &
                     (mutdf.PredFreq > 1e-6)].PredFreq.apply(
                         get_binom,
                         samples=samples).apply(np.around).value_counts()
    afs = [sfvc[i] if i in sfvc.index else 0 for i in range(0, samples + 1)]
    return dadi.Spectrum(afs)

Exemplo n.º 28

0

Exibir arquivo

def dadi_to_fsc_sfs(sfs_files, dadi_out_path, fsc_out_path, sample_size=20):

    ## get data sfs files and make them into one joint sfs and save
    msprime_joint_sfs = dadi.Spectrum([[0] * (sample_size + 1)] *
                                      (sample_size + 1))

    for fid in sfs_files:
        msprime_joint_sfs += dadi.Spectrum.from_file(fid)

    msprime_joint_sfs.to_file(dadi_out_path)

    ## convert dadi 2D sfs to FSC 2D sfs
    ## NB: FSC joint format file names look like this: <prefix>_jointMAFpop1_0.obs
    ## Where the first pop specified is listed in the rows and the second pop
    ## specified is listed in the columns.
    with open(fsc_out_path, 'w') as outfile:
        outfile.write("1 observation\n")
        ## Format column headers (i.e. d0_0 d0_1 d0_2 .. d0_n for deme 0 up to sample size of n)
        outfile.write(
            "\t" + "\t".join(["d0_" + str(x)
                              for x in range(sample_size + 1)]) + "\n")

        ## Format row headers
        row_headers = ["d1_" + str(x) for x in range(sample_size + 1)]

        with open(dadi_out_path) as infile:
            ## Get the second line of the dadi-style sfs which contains the data
            row_data = infile.readlines()[1].split()
            row_size = sample_size + 1
            ## Slice the row data into evenly sized chunks based on the number of columns
            rows = [
                row_data[i:i + row_size]
                for i in range(0, len(row_data), row_size)
            ]
            ## Write out each row to the file
            for i, row_head in enumerate(row_headers):
                outfile.write(row_head + "\t" + " ".join(rows[i]) + "\n")

Exemplo n.º 29

0

Exibir arquivo

dadi_ll_msmc_model = dadi.Inference.ll_multinom(model, fs )
optimalthetaFromDadi = dadi.Inference.optimal_sfs_scaling(model, fs) # 
header='\t'.join(str(x) for x in ("dadiLL","AnnabelLL","NancTheta","dadiOptimalTheta"))
output='\t'.join(str(x) for x in (dadi_ll_msmc_model,multinom_LL_AB,scalingTheta,optimalthetaFromDadi))
outputFile.write(('{0}\n{1}\n').format(header,output))
outputFile.close()
########## plot an image: ############
#import pylab
import matplotlib.pyplot as plt 
#fig=plt.figure(1)
#pylab.ion()
outputFigure=str(str(outdir)+"/"+str(modelName)+".expSFS.DadiScaling.figure.png")
dadi.Plotting.plot_1d_comp_multinom(model, fs)
pyplot.title((modelName))
plt.savefig(outputFigure)

######### this is currently a crappy way to do this; improve if going deeper down this path #########
############## pulling from grid search, this is the best-fit sfs (relative to theta =1 ) from dadi with T = 35 for AL ##########
modelName="bestFitDadiModel.T35.fromGridSearch"
model = dadi.Spectrum([0, 0.78349086, 0.51350478 ,0.38226656, 0.30918362 ,0.26505468, 0.23705724 ,0.21899399, 0.20767940,
0.20143653, 0.09971952,0,0,0,0,0,0,0,0,0,0]).fold() # this is from R in my grid search for CA ; is w/in 1 pt of MLE with T = 35 gen
model_freq_fold = model/sum(model)

outputFile=open(str(outdir)+"/"+str(modelName)+".LLs.andOptimalTheta.txt","w")
multinom_LL_AB= LhoodCalc(model_freq_fold,fs,ns/2)
dadi_ll_msmc_model = dadi.Inference.ll_multinom(model, fs )
optimalthetaFromDadi = dadi.Inference.optimal_sfs_scaling(model, fs) # 
header='\t'.join(str(x) for x in ("dadiLL","AnnabelLL","NancTheta","dadiOptimalTheta"))
output='\t'.join(str(x) for x in (dadi_ll_msmc_model,multinom_LL_AB,scalingTheta,optimalthetaFromDadi))
outputFile.write(('{0}\n{1}\n').format(header,output))
outputFile.close()

Exemplo n.º 30

0

Exibir arquivo

import pickle
import pylab


#############################################################################
print "Load SFS data"

f1 = open("Lyrata_SFS.txt", "r")
raw1 = f1.readlines()
f1.close()
sampleSize1 = int(raw1[1])

sfs_NS1 = numpy.array(raw1[5][:-1].split(" "), dtype='float64').tolist()
lensfs = len(sfs_NS1)
sfs_NS1 = [0.] + sfs_NS1 + numpy.zeros(shape=(sampleSize1-lensfs,)).tolist()
sfs_NS1_noMask = dadi.Spectrum(sfs_NS1, data_folded=True, mask = [True]*1 + [False]*(lensfs) + [True]*(sampleSize1-lensfs))  

sfs_S1 = numpy.array(raw1[3][:-1].split(" "), dtype='float64').tolist()
lensfs = len(sfs_S1)
sfs_S1 = [0.] + sfs_S1 + numpy.zeros(shape=(sampleSize1-lensfs,)).tolist()
sfs_S1_noMask = dadi.Spectrum(sfs_S1, data_folded=True, mask = [True]*1 + [False]*(lensfs) + [True]*(sampleSize1-lensfs))  

f2 = open("Thaliana_SFS.txt", "r")
raw2 = f2.readlines()
f2.close()
sampleSize2 = int(raw2[1])

sfs_NS2 = numpy.array(raw2[5][:-1].split(" "), dtype='float64').tolist()
lensfs = len(sfs_NS2)
sfs_NS2 = [0.] + sfs_NS2 + numpy.zeros(shape=(sampleSize2-lensfs,)).tolist()
sfs_NS2_noMask = dadi.Spectrum(sfs_NS2, data_folded=True, mask = [True]*1 + [False]*(lensfs) + [True]*(sampleSize2-lensfs))  # Mask singletons!!