def summariseChunk(chunk, ncpg=False):

    all_SFS_list = [map(int, i.split(':')) for i in list(chunk[4])]
    ncpg_SFS_list = [map(int, i.split(':')) for i in list(chunk[5])]
    div_list = [map(int, i.split(':')) for i in list(chunk[6])]

    all_SFS = all_SFS_list[0]
    ncpg_SFS = ncpg_SFS_list[0]
    div = div_list[0]

    for i in range(len(all_SFS_list)):
        if i == 0: continue

        all_SFS = SFS_tools.merge_SFS(all_SFS, all_SFS_list[i])
        ncpg_SFS = SFS_tools.merge_SFS(ncpg_SFS, ncpg_SFS_list[i])
        div = SFS_tools.merge_SFS(div, div_list[i])


#	print div
#	print div[2:] , 'ncpg'
#	print div[:2] , 'all'

    if ncpg:
        return ncpg_SFS, div[2:]
    else:
        return all_SFS, div[:2]
def get_sfs_dicts(file_name):
    x = open(file_name, "r")
    temp_sfs_dict = {}
    out_sfs_dict = {}
    out_sfs_dict["name"] = file_name
    for i in x:
        y = i.strip("\n").split("[")
        region = y[1].strip(", ").strip("''")
        region_sfs = map(
            int, y[2].strip("]]").split(",")
        )  ## This doozy gives you the processed sfs from the file of SFS values
        if region not in temp_sfs_dict.keys():
            temp_sfs_dict[region] = [region_sfs]
        elif region in temp_sfs_dict.keys():
            temp_sfs_dict[region].append(region_sfs)
    for key in temp_sfs_dict.keys():
        sfs_list = temp_sfs_dict[key]
        if len(sfs_list) == 1:
            out_sfs_dict[key] = sfs_list
        elif len(sfs_list) > 1:
            grand_sfs = sfs_list[0]
            for temp in sfs_list[1:]:
                temp2 = merge_SFS(grand_sfs, temp)
                grand_sfs = temp2
            out_sfs_dict[key] = grand_sfs

    return out_sfs_dict
Exemplo n.º 3
0
def combineElements(polyfix, lengths):
    elDict = {}
    for i in polyfix.keys():
        sfs = []
        try:
            for j in polyfix[i].keys():
                if i == 'g1' and j == 'm12':  ## for the case of exons
                    elDict['syn'] = polyfix[i][j]
                    continue
                if len(sfs) == 0:
                    sfs = polyfix[i][j]
                else:
                    sfs = sfs_tools.merge_SFS(sfs, polyfix[i][j])
        except KeyError:
            continue
        elDict[i] = sfs

    for i in elDict.keys():
        #		print i
        zero = lengths[i] - sum(elDict[i])
        try:
            elDict[i][0] = zero
        except IndexError:
            return elDict
    return elDict
Exemplo n.º 4
0
def mergeTheSFS(SFS):
    if len(SFS) == 1:
        return SFS[0]
    else:
        sfs = SFS[0]
        for i in SFS[1:]:
            sfs = merge_SFS(sfs, i)
        return sfs
def mergeManySFS(spectra):
    count = 0
    for i in spectra:
        if count == 0:
            sfs = i
        else:
            sfs = SFS.merge_SFS(sfs, i)
        count += 1
    return sfs
Exemplo n.º 6
0
def get_full_sfs(sfs_dict_raw):
    grand = {}
    for i in sfs_dict_raw.keys():
        full_sfs = sfs_dict_raw[i][0]
        for s in sfs_dict_raw[i][1:]:
            if not s: continue
            full_sfs = SFS.merge_SFS(full_sfs, s)
        grand[i] = full_sfs

    return grand
Exemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(
        description="Extract the SFS from a binch of merged SLiM output files")

    parser.add_argument(
        "-i",
        "--input",
        required=True,
        dest="input",
        type=str,
        help="The name of the directory containing the SLiM output")

    parser.add_argument(
        "-o",
        "--output",
        required=True,
        dest="output",
        type=str,
        help="The name of the output file you want to write to")

    args = parser.parse_args()

    #	output = []
    for m in range(1, 9):
        print 'm' + str(m)
        full_sfs = []
        for i in glob.glob(args.input + '/R*'):
            num = i.split('/')[-1].split('.')[2]

            process = subprocess.Popen(['zgrep', 'm' + str(m), i],
                                       stdout=subprocess.PIPE).communicate()[0]
            fixations, sfs = getSFSfromSLiM(process)
            if fixations == None: continue
            polymorphs = sum(sfs)
            sfs[-1] += fixations
            sfs[0] = 0
            print SFS.pi(sfs) / 140000.
            print ':'.join(map(str, sfs))
            continue
            if len(full_sfs) == 0:
                full_sfs = sfs
            else:
                full_sfs = SFS.merge_SFS(full_sfs, sfs)


#		output.append(['m'+str(m),full_sfs])
    return

    txt = open(args.output, 'w')
    for i in output:
        print i
        txt.write(i[0] + '\n')
        txt.write(' '.join(map(str, i[1])) + '\n')

    txt.close()
def main():
    parser = argparse.ArgumentParser(
        description=
        "Combine all the sfs files coming out of the sfs_from_slim_update_bootstrap.py script"
    )

    parser.add_argument(
        "-i",
        "--input",
        required=True,
        dest="input",
        type=str,
        help="The name of the file that contains the sfs files")
    parser.add_argument("-o",
                        "--output",
                        required=True,
                        dest="output",
                        type=str,
                        help="The name of the output file")
    args = parser.parse_args()

    sfs_dict = {}

    for i in gzip.open(args.input):
        z = i.split('[')
        region = z[1].strip("'").replace("'", '')
        sfs_temp = map(
            int, z[2].replace(']', '').replace(',', '').strip().split(' '))
        try:
            sfs_dict[region].append(sfs_temp)
        except KeyError:
            sfs_dict[region] = [sfs_temp]

    data = []

    for i in sfs_dict.keys():
        sfs = sfs_dict[i][0]
        for j in sfs_dict[i][1:]:
            sfs = SFS_tools.merge_SFS(sfs, j)
        stream = i.split('.')[0]
        dist = map(int, i.replace(',', '').split('.')[1].split('-'))
        if stream == 'u':
            mult = -1
        else:
            mult = 1

        mid = mult * sum(dist) / 2

        data.append([mid, SFS_tools.pi(sfs), SFS_tools.tajima(sfs)])

    pd.DataFrame(data, columns=['dist', 'pi', 'TD']).to_csv(args.output)
Exemplo n.º 9
0
def get_boot_sfs(sfs_dict_raw):
    boot = {}
    for i in sfs_dict_raw.keys():
        sample = [
            sfs_dict_raw[i][j] for j in bootstrap_sample(len(sfs_dict_raw[i]))
        ]

        boot_sfs = sample[0]
        for s in sample[1:]:
            boot_sfs = SFS.merge_SFS(boot_sfs, s)
        boot[i] = boot_sfs


#	print boot['u.101-111']
    return boot
Exemplo n.º 10
0
def main():
	parser = argparse.ArgumentParser(description="Take the dict of SFS for different SLiM runs and make a composite SFS or bootstrapped SFS")
	parser.add_argument("-i","--input", 
		required = True,
		dest = "input", 
		type =str, 
		help = "Give the name of the gzipped pickle file")

	parser.add_argument("-o","--output", 
		required = True,
		dest = "output", 
		type =str, 
		help = "the name of the output")
	parser.add_argument("-b","--boots", 
		required = False,
		action = 'store_true',
		help = "Do you want a bootstrapped SFS?",
		default = False)

	args = parser.parse_args()
	
	pickle_jar = pickle.load(gzip.open(args.input,"rb"))
	

	names = [p for p in pickle_jar.keys() if p]
	for i in names:
		print i
	if args.boots:
		boots = [names[random.randint(0,len(names)-1)] for i in range(len(names))]
		names = boots

	bigDict = {}
	for n in names:
		z = pickle_jar[n]
		for j in z.keys():
                        if not z[j]: continue
			if j not in bigDict.keys():
                                bigDict[j] = z[j]
			else:
				bigDict[j] =SFS_tools.merge_SFS(z[j],bigDict[j]) 
	output = open(args.output,'w')
	for i in bigDict.keys():
		sfs = map(str,bigDict[i])
		output.write(i+'\n'+' '.join(sfs)+'\n')
	output.close()
def combineMany(SFSs):

    sfs = map(int, SFSs[0].split(':'))
    for s in SFSs[1:]:
        sfs = SFS_tools.merge_SFS(sfs, map(int, s.split(':')))
    return sfs