Пример #1
0
def make_files_for_graphing(file_prefix, window_sizes, step_size, working_dir,
                            genome_fai):
    in_bed = file_prefix + '.bed'
    for ws in window_sizes:
        #make bed file with windows and returns genome name and window size variable
        genome_and_window = homozygosity_mapping_cybertron.make_windows(
            working_dir, genome_fai, ws, step_size).split('.')[0]
        print genome_and_window
        window_bed = genome_and_window + '.bed'
        ##bedtools intersect
        print(window_bed, in_bed)
        with open('temp.bed', "w") as naf_fh:
            hom_bt_intersect = subprocess.Popen([
                'bedtools', 'intersect', '-a', window_bed, '-b', in_bed, '-c'
            ],
                                                stdout=naf_fh)
            hom_bt_intersect.wait()
        ##add header
        out_bed = file_prefix + '.' + genome_and_window + '.bed'
        with open(out_bed, "w") as out_fh, open('temp.bed', "r") as in_fh:
            out_fh.write(
                delim.join(['chr', 'start', 'end', 'snp_number']) + '\n')
            for line in in_fh:
                ##removing the chr from start of the line
                out_fh.write(line[3:])
Пример #2
0
def make_files_for_graphing(snp_beds, window_size, step_size, genome_fai,
                            working_dir):
    for snp_bed in snp_beds:
        for ws in window_size:
            #make bed file with windows and returns genome name and window size variable
            genome_and_window = homozygosity_mapping_cybertron.make_windows(
                working_dir, genome_fai, ws, step_size).split('.')[0]
            print(genome_and_window)
            window_bed = genome_and_window + '.bed'
            ##all shared snps
            out_file_naf = snp_bed.rsplit(
                '.', 1)[0] + '.' + genome_and_window + '.aaf_for_r.txt'
            out_file_combined = snp_bed.rsplit(
                '.', 1)[0] + '.' + genome_and_window + '.combined_for_r.txt'
            ##bedtools intersect
            with open('temp.bed', "w") as naf_fh:
                # hom_bt_intersect = subprocess.Popen(['bedtools', 'intersect', '-a', window_bed, '-b', snp_bed, '-c'], stdout=naf_fh)
                # hom_bt_intersect.wait()
                hom_bt_intersect = subprocess.Popen([
                    'bedtools', 'intersect', '-a', window_bed, '-b', snp_bed,
                    '-wa', '-wb'
                ],
                                                    stdout=naf_fh)
                hom_bt_intersect.wait()
            with open('temp.bed', "r") as in_fh:
                aaf_dict = {}
                for line in in_fh:
                    line = line.rstrip().split(delim)
                    chr_start_end = delim.join(line[:3])
                    maaf = float(line[6])
                    faaf = float(line[7])
                    if chr_start_end in aaf_dict:
                        aaf_dict[chr_start_end][0].append(maaf)
                        aaf_dict[chr_start_end][1].append(faaf)
                    else:
                        aaf_dict[chr_start_end] = [[maaf], [faaf]]
            ##write outfile from dict
            with open(out_file_naf,
                      "w") as outn_fh, open(out_file_combined, "w") as outc_fh:
                outn_fh.write(
                    delim.join(['chr', 'start', 'end', 'test', 'average_aaf'])
                    + '\n')
                outc_fh.write(
                    delim.join(['chr', 'start', 'end', 'test', 'value']) +
                    '\n')

                for window in aaf_dict:
                    ave_maaf = sum(aaf_dict[window][0]) / len(
                        aaf_dict[window][0])
                    ave_faaf = sum(aaf_dict[window][1]) / len(
                        aaf_dict[window][1])
                    aaf_diff = ave_maaf - ave_faaf
                    snp_count = len(aaf_dict[window][0])
                    log_snp_count = math.log(snp_count, 2)
                    outn_fh.write(window + delim + 'male_aaf' + delim +
                                  str(ave_maaf) + '\n')
                    outn_fh.write(window + delim + 'female_aaf' + delim +
                                  str(ave_faaf) + '\n')
                    outc_fh.write(window + delim + 'aaf_male' + delim +
                                  str(ave_maaf) + '\n')
                    outc_fh.write(window + delim + 'aaf_female' + delim +
                                  str(ave_faaf) + '\n')
                    outc_fh.write(window + delim + 'aaf_difference' + delim +
                                  str(aaf_diff) + '\n')
                    # outc_fh.write(window + delim + 'snp_count' + delim + str(snp_count) + '\n')
                    outc_fh.write(window + delim + 'log2_snp_count' + delim +
                                  str(log_snp_count) + '\n')
Пример #3
0
    'kenny_wgs_0620.chr17_20-40mb.ko_only.xls',
    'kenny_wgs_0620.chr17_20-60mb.xls',
    'kenny_wgs_0620.chr17_20-60mb.ko_only.xls', 'kenny_wgs_0620.chr17.xls',
    'kenny_wgs_0620.chr17.ko_only.xls', 'kenny_wgs_0620.chr17.ko_only.129.xls'
]
beds_to_graph = []
# '''
for file_to_graph in files_to_graph:
    bed_to_graph = file_to_graph.rsplit('.', 1)[0] + '.bed'
    beds_to_graph.append(bed_to_graph)
    make_bed_from_ann_txt(file_to_graph, bed_to_graph)

for bed_to_graph in beds_to_graph:
    for ws in window_size:
        #make bed file with windows and returns genome name and window size variable
        genome_and_window = homozygosity_mapping_cybertron.make_windows(
            working_dir, genome_fai, ws, step_size).split('.')[0]
        print genome_and_window
        window_bed = genome_and_window + '.bed'
        ##all shared snps
        out_bed = bed_to_graph.rsplit('.',
                                      1)[0] + '.' + genome_and_window + '.bed'
        ##bedtools intersect
        with open('temp.bed', "w") as naf_fh:
            hom_bt_intersect = subprocess.Popen([
                bedtools, 'intersect', '-a', window_bed, '-b', bed_to_graph,
                '-c'
            ],
                                                stdout=naf_fh)
            hom_bt_intersect.wait()
        ##filter for region of interest
        if '30mb' in bed_to_graph:
def make_files_for_graphing(snp_beds, window_size, step_size, genome_fai,
                            working_dir):
    for snp_bed in snp_beds:
        for ws in window_size:
            #make bed file with windows and returns genome name and window size variable
            genome_and_window = homozygosity_mapping_cybertron.make_windows(
                working_dir, genome_fai, ws, step_size).split('.')[0]
            print(genome_and_window)
            window_bed = genome_and_window + '.bed'
            ##all shared snps
            out_file_naf1 = snp_bed.rsplit(
                '.',
                1)[0] + '.' + genome_and_window + '.experiment1.aaf_for_r.txt'
            out_file_naf2 = snp_bed.rsplit(
                '.',
                1)[0] + '.' + genome_and_window + '.experiment2.aaf_for_r.txt'
            out_file_combined = snp_bed.rsplit(
                '.', 1)[0] + '.' + genome_and_window + '.combined_for_r.txt'
            out_file_naf_male = snp_bed.rsplit(
                '.', 1)[0] + '.' + genome_and_window + '.male.aaf_for_r.txt'
            out_file_naf_female = snp_bed.rsplit(
                '.', 1)[0] + '.' + genome_and_window + '.female.aaf_for_r.txt'
            out_file_naf_all = snp_bed.rsplit(
                '.', 1)[0] + '.' + genome_and_window + '.aaf_for_all.txt'
            ##bedtools intersect
            with open('temp.bed', "w") as naf_fh:
                # hom_bt_intersect = subprocess.Popen(['bedtools', 'intersect', '-a', window_bed, '-b', snp_bed, '-c'], stdout=naf_fh)
                # hom_bt_intersect.wait()
                hom_bt_intersect = subprocess.Popen([
                    'bedtools', 'intersect', '-a', window_bed, '-b', snp_bed,
                    '-wa', '-wb'
                ],
                                                    stdout=naf_fh)
                hom_bt_intersect.wait()
            with open('temp.bed', "r") as in_fh:
                aaf_dict = {}
                for line in in_fh:
                    line = line.rstrip().split(delim)
                    chr_start_end = delim.join(line[:3])
                    maaf1 = float(line[6])
                    faaf1 = float(line[7])
                    maaf2 = float(line[8])
                    faaf2 = float(line[9])
                    if chr_start_end in aaf_dict:
                        aaf_dict[chr_start_end][0].append(maaf1)
                        aaf_dict[chr_start_end][1].append(faaf1)
                        aaf_dict[chr_start_end][2].append(maaf2)
                        aaf_dict[chr_start_end][3].append(faaf2)
                    else:
                        aaf_dict[chr_start_end] = [[maaf1], [faaf1], [maaf2],
                                                   [faaf2]]
            ##write outfile from dict
            with open(out_file_naf1, "w") as outn1_fh, open(
                    out_file_naf2, "w") as outn2_fh, open(
                        out_file_combined, "w") as outc_fh, open(
                            out_file_naf_male, "w") as outm_fh, open(
                                out_file_naf_female,
                                "w") as outf_fh, open(out_file_naf_all,
                                                      "w") as outall_fh:
                outn1_fh.write(
                    delim.join(['chr', 'start', 'end', 'test', 'average_aaf'])
                    + '\n')
                outn2_fh.write(
                    delim.join(['chr', 'start', 'end', 'test', 'average_aaf'])
                    + '\n')
                outm_fh.write(
                    delim.join(['chr', 'start', 'end', 'test', 'average_aaf'])
                    + '\n')
                outf_fh.write(
                    delim.join(['chr', 'start', 'end', 'test', 'average_aaf'])
                    + '\n')
                outc_fh.write(
                    delim.join(['chr', 'start', 'end', 'test', 'value']) +
                    '\n')
                outall_fh.write(
                    delim.join(['chr', 'start', 'end', 'test', 'average_aaf'])
                    + '\n')

                for window in aaf_dict:
                    ave_maaf1 = sum(aaf_dict[window][0]) / len(
                        aaf_dict[window][0])
                    ave_faaf1 = sum(aaf_dict[window][1]) / len(
                        aaf_dict[window][1])
                    aaf_diff1 = ave_maaf1 - ave_faaf1
                    ave_maaf2 = sum(aaf_dict[window][2]) / len(
                        aaf_dict[window][2])
                    ave_faaf2 = sum(aaf_dict[window][3]) / len(
                        aaf_dict[window][3])
                    aaf_diff2 = ave_maaf2 - ave_faaf2
                    snp_count = len(aaf_dict[window][0])
                    log_snp_count = math.log(snp_count, 2)
                    ##first graphing files - by experiment
                    outn1_fh.write(window + delim + 'male_aaf_1' + delim +
                                   str(ave_maaf1) + '\n')
                    outn1_fh.write(window + delim + 'female_aaf_1' + delim +
                                   str(ave_faaf1) + '\n')
                    outn2_fh.write(window + delim + 'male_aaf_2' + delim +
                                   str(ave_maaf2) + '\n')
                    outn2_fh.write(window + delim + 'female_aaf_2' + delim +
                                   str(ave_faaf2) + '\n')
                    ##first graphing files - by sex
                    outm_fh.write(window + delim + 'male_aaf_1' + delim +
                                  str(ave_maaf1) + '\n')
                    outf_fh.write(window + delim + 'female_aaf_1' + delim +
                                  str(ave_faaf1) + '\n')
                    outm_fh.write(window + delim + 'male_aaf_2' + delim +
                                  str(ave_maaf2) + '\n')
                    outf_fh.write(window + delim + 'female_aaf_2' + delim +
                                  str(ave_faaf2) + '\n')
                    ##second graphing file
                    outc_fh.write(window + delim + 'aaf_male_1' + delim +
                                  str(ave_maaf1) + '\n')
                    outc_fh.write(window + delim + 'aaf_female_1' + delim +
                                  str(ave_faaf1) + '\n')
                    outc_fh.write(window + delim + 'aaf_difference_1' + delim +
                                  str(aaf_diff1) + '\n')
                    outc_fh.write(window + delim + 'aaf_male_2' + delim +
                                  str(ave_maaf2) + '\n')
                    outc_fh.write(window + delim + 'aaf_female_2' + delim +
                                  str(ave_faaf2) + '\n')
                    outc_fh.write(window + delim + 'aaf_difference_2' + delim +
                                  str(aaf_diff2) + '\n')
                    # outc_fh.write(window + delim + 'snp_count' + delim + str(snp_count) + '\n')
                    outc_fh.write(window + delim + 'log2_snp_count' + delim +
                                  str(log_snp_count) + '\n')
                    ##third graphing - combine naf for both experiments
                    outall_fh.write(window + delim + 'aaf_male_1' + delim +
                                    str(ave_maaf1) + '\n')
                    outall_fh.write(window + delim + 'aaf_female_1' + delim +
                                    str(ave_faaf1) + '\n')
                    outall_fh.write(window + delim + 'aaf_male_2' + delim +
                                    str(ave_maaf2) + '\n')
                    outall_fh.write(window + delim + 'aaf_female_2' + delim +
                                    str(ave_faaf2) + '\n')