def getSFSfromSLiM(inp):
    polymorphisms = []
    fixations = 0

    if len(inp) == 0: return None, None
    for l in inp.split('\n'):
        if len(l) == 0: continue

        if l[0] == 'm' or l[0] == 'g':
            continue
        x = l.split(' ')
        if int(x[7]) > int(x[6]):
            if int(x[6]) > 10000:  # Assuming Ne = 1000, burn-in is 10,000
                fixations += 1
        elif int(x[7]) < int(x[6]):
            polymorphisms.append(int(x[7]))
    return fixations, SFS.SFS_from_all_frequencies(polymorphisms, 20)
Пример #2
0
def orgPolyDict(organ_mutations, N):
    sfsDict = {}
    for i in organ_mutations.keys():
        orgDict = {}
        orgDictRaw = {}
        for q in organ_mutations[i]:
            if q[1] not in orgDictRaw.keys():
                orgDictRaw[q[1]] = [int(q[7])]
            else:
                orgDictRaw[q[1]].append(int(q[7]))
        for o in orgDictRaw.keys():
            orgDict[o] = sfs_tools.SFS_from_all_frequencies(orgDictRaw[o], N)


#			print orgDict[o]
        sfsDict[i] = orgDict
    return sfsDict
Пример #3
0
def get_sfs_dict_from_sample(slim_input):
    data = [i.strip() for i in gzip.open(slim_input).readlines()]
    x = ts.slim(data, fixed=True, give_genomes=True)
    if not x.sanity:
        return [None, None]
#	print x.name
    genomes = x.genome_dict()
    mutations = x.mutations_dict()
    lengthDict = parseLengths(x.organ_lengths())
    individuals = [random.choice(genomes.keys()) for i in range(20)]
    #	print individuals
    #	if x.name == 	'/exports/csce/eddie/biology/groups/eddie_biology_ieb_keightley/toms_simulations/updated_DFE/longRuns/full_usfs/configs/3381.temp.slim':
    #		individuals = ['p1:1398', 'p1:1646', 'p1:297', 'p1:165', 'p1:999', 'p1:1451', 'p1:982', 'p1:973', 'p1:615', 'p1:832', 'p1:12', 'p1:1109', 'p1:1137', 'p1:496', 'p1:164', 'p1:412', 'p1:1687', 'p1:1373', 'p1:72', 'p1:39']
    muts_by_organ = x.organ_mutations()
    new_muts = Counter()
    for g in individuals:
        for m in genomes[g]:
            new_muts[m] += 1

    polyDict = {}
    for h in muts_by_organ.keys():
        mTypeDict = {}
        for m in muts_by_organ[h]:
            if new_muts[m[0]] == 0: continue
            if m[1] not in mTypeDict.keys():
                mTypeDict[m[1]] = [new_muts[m[0]]]
            else:
                mTypeDict[m[1]].append(new_muts[m[0]])
#		print h, mTypeDict
        mPoly = {}
        for k in mTypeDict.keys():
            mPoly[k] = sfs_tools.SFS_from_all_frequencies(mTypeDict[k], 20)
        polyDict[h] = mPoly


#	print '!', x.name
    fixedDict = x.organ_fixed(threshold=int(x.N) * 20)

    fixD = orgFixDict(fixedDict)
    polyfix = combinePolyFix(polyDict, fixD)
    elDict = combineElements(polyfix, lengthDict)

    print 'processed ' + x.name
    return [x.name, elDict]
Пример #4
0
def get_sfs_dict_from_sample(slim_input):
    x = ts.slim(slim_input, fixed=True, give_genomes=True)
    if not x.sanity:
        return [None, None]

    genomes = x.genome_dict()
    mutations = x.mutations_dict()
    lengthDict = parseLengths(x.organ_lengths())
    individuals = [random.choice(genomes.keys()) for i in range(20)]
    muts_by_organ = x.organ_mutations()
    new_muts = Counter()
    for g in individuals:
        for m in genomes[g]:
            new_muts[m] += 1
    polyDict = {}
    for h in muts_by_organ.keys():
        mTypeDict = {}
        for m in muts_by_organ[h]:
            if new_muts[m[0]] == 0: continue
            if m[1] not in mTypeDict.keys():
                mTypeDict[m[1]] = [new_muts[m[0]]]
            else:
                mTypeDict[m[1]].append(new_muts[m[0]])


#		print h, mTypeDict
        mPoly = {}
        for k in mTypeDict.keys():
            mPoly[k] = sfs_tools.SFS_from_all_frequencies(mTypeDict[k], 20)
        polyDict[h] = mPoly

    thresh = x.N * 10
    fixedDict = x.organ_fixed(threshold=int(x.N) * 10)
    fixD = orgFixDict(fixedDict)
    polyfix = combinePolyFix(polyDict, fixD)
    elDict = combineElements(polyfix, lengthDict)

    print 'processed ' + x.name
    return [x.name, elDict]
def sfsFromFreq(chunk, minCoverage=0):
    sfs_all = []
    sfs_ncpg = []
    rat_div_all = 0
    fam_div_all = 0
    rat_div_ncpg = 0
    fam_div_ncpg = 0

    for i in chunk:
        freq = i.strip().split()
        if int(freq[2]) <= minCoverage: continue
        if freq[3] == '.':
            pass  ## Site is not a variant, so no need to look at HWE
        elif freq[3] != '.':
            if float(freq[3]) < 0.0002:
                continue  ## Site is a variant, so need to check for HWE

        cast_alleles = freq[5].split(',')
        if cast_alleles[0] == '.':
            continue
        cast_alleles = map(int, cast_alleles)

        fam_alleles = freq[7].split(',')
        if fam_alleles[0] == '.':
            continue
        fam_alleles = map(int, fam_alleles)

        rat_alleles = freq[9].split(',')
        if rat_alleles[0] == '.':
            continue
        rat_alleles = map(int, rat_alleles)

        alleleFreq = getAlleleFreq(cast_alleles)

        sfs_all.append(alleleFreq)

        cast_cpg = freq[4]
        fam_cpg = freq[6]
        rat_cpg = freq[8]

        rat_div_temp = divergent(cast_alleles, rat_alleles, out_alleles=1)
        fam_div_temp = divergent(cast_alleles, fam_alleles, out_alleles=2)
        if rat_div_temp:
            rat_div_all += rat_div_temp
        if fam_div_temp:
            fam_div_all += fam_div_temp

        if '1' not in [cast_cpg, fam_cpg, rat_cpg]:
            sfs_ncpg.append(alleleFreq)
            rat_div_temp2 = divergent(cast_alleles, rat_alleles, out_alleles=1)
            fam_div_temp2 = divergent(cast_alleles, fam_alleles, out_alleles=2)
            if rat_div_temp2:
                rat_div_ncpg += rat_div_temp2
            if fam_div_temp2:
                fam_div_ncpg += fam_div_temp2

    divList = [fam_div_all, rat_div_all, fam_div_ncpg, rat_div_ncpg]
    allSitesSFS = site_frequency_spectrum.SFS_from_all_frequencies(sfs_all, 20)
    ncpgSitesSFS = site_frequency_spectrum.SFS_from_all_frequencies(
        sfs_ncpg, 20)

    return allSitesSFS, ncpgSitesSFS, divList