Пример #1
0
 def test_get_ptcs_in_window(self):
     ptc_file = "test_data/disease_snps_ops/test_get_ptcs_in_window/ptc_list.txt"
     relative_positions_file = "test_data/disease_snps_ops/test_get_ptcs_in_window/relative_positions.txt"
     ptcs = gen.read_many_fields(ptc_file, "\t")
     relative_positions = gen.read_many_fields(relative_positions_file, "\t")
     ptc_list = {}
     relative_positions_list = {}
     for i, ptc in enumerate(ptcs):
         ptc[1], ptc[2], ptc[3] = int(ptc[1]), int(ptc[2]), int(ptc[3])
         ptc_list[i] = ptc
         rel_pos = relative_positions[i]
         rel_pos[1] = int(rel_pos[1])
         relative_positions_list[i] = rel_pos
     expected_file = "test_data/disease_snps_ops/test_get_ptcs_in_window/expected.txt"
     expected_list = gen.read_many_fields(expected_file, "\t")
     expected = {}
     ends = [5,3]
     for i in ends:
         expected[i] = {}
     for entry in expected_list:
         if entry[9] != '.' and int(entry[9]) in ends:
             required_entry = entry[:9]
             required_entry[1], required_entry[2], required_entry[3], required_entry[8] = int(required_entry[1]), int(required_entry[2]), int(required_entry[3]), int(required_entry[8])
             expected[int(entry[9])][int(entry[10])] = required_entry
     observed = get_ptcs_in_window(ptc_list, relative_positions_list, 4, 69)
     self.assertEqual(observed, expected)
Пример #2
0
def get_unique_rel_pos(unique_ptcs, disease_snps_relative_exon_positions, kgenomes_ptcs_file, kgenomes_ptcs_exon_positions, unique_ptcs_rel_pos_file, kgenomes_ptcs_rel_pos_file):
    '''
    Get the relative positions of the unique ptcs
    '''
    snps = gen.read_many_fields(disease_snps_relative_exon_positions, "\t")
    snp_list = collections.defaultdict(lambda: collections.defaultdict())
    for snp in snps:
        snp_pos = int(snp[7])
        rel_pos = int(snp[11])
        snp_list[snp_pos] = rel_pos

    ptcs = gen.read_many_fields(unique_ptcs, "\t")
    with open(unique_ptcs_rel_pos_file, "w") as outfile:
        for ptc in ptcs:
            ptc_pos = int(ptc[7])
            ptc[11] = snp_list[ptc_pos]
            outfile.write("{0}\n".format("\t".join(gen.stringify(ptc))))

    kgenomes_ptc_positions = gen.read_many_fields(kgenomes_ptcs_exon_positions, "\t")
    kgenomes_ptc_list = collections.defaultdict(lambda: collections.defaultdict())
    for ptc in kgenomes_ptc_positions[1:]:
        snp_pos = int(ptc[7])
        rel_pos = int(ptc[11])
        kgenomes_ptc_list[snp_pos] = rel_pos

    kgenomes_ptcs = gen.read_many_fields(kgenomes_ptcs_file, "\t")
    with open(kgenomes_ptcs_rel_pos_file, "w") as outfile:
        for ptc in kgenomes_ptcs:
            ptc_pos = int(ptc[7])
            ptc[11] = kgenomes_ptc_list[ptc_pos]
            outfile.write("{0}\n".format("\t".join(gen.stringify(ptc))))
Пример #3
0
def motif_codon_density(motif_file, output_directory):

    stops = ["TAA", "TAG", "TGA"]
    gc_matchd_motifs_file = "{0}/gc_matched_combinations.bed".format(
        output_directory)
    if not os.path.isfile(gc_matchd_motifs_file):
        seqo.get_gc_matched_motifs(stops, gc_matchd_motifs_file)

    temp_dir = "temp_motif_density"
    gen.create_output_directories(temp_dir)

    motif_sets = gen.read_many_fields(gc_matchd_motifs_file, "\t")
    motif_sets.append(["TAA", "TAG", "TGA"])

    args = [motif_file, temp_dir]
    outputs = simoc.run_simulation_function(motif_sets,
                                            args,
                                            ops.calc_codon_density_in_motifs,
                                            sim_run=False)

    new_output_dir = "{0}/motif_densities".format(output_directory)
    gen.create_output_directories(new_output_dir)

    output_file = "{0}/{1}.csv".format(new_output_dir,
                                       motif_file.split("/")[-1].split(".")[0])
    with open(output_file, "w") as outfile:
        outfile.write("id,motifs,density\n")
        for i, file in enumerate(sorted(outputs)):
            data = gen.read_many_fields(file, ",")[0]
            outfile.write("{0},{1},{2}\n".format(i + 1, data[0], data[1]))

    gen.remove_directory(temp_dir)
Пример #4
0
def get_passed_NONCODE_codes(input_fasta, codes_file, mapping_file,
                             output_fasta, code):
    """
    Only keep sequences that have particular NONCODE code

    Args:
        input_fasta (str): path to input fasta file
        codes_file (str): path to file containing code
        mapping_file (str): path to transcript-gene mapping file
        output_fasta (str): path to output fasta
        code (str): code to look for. As string because cant pass 0001 through
    """

    codes = {
        code[0]: code[1]
        for code in gen.read_many_fields(codes_file, "\t")
    }
    mappings = {
        name[0].split(".")[0]: name[1]
        for name in gen.read_many_fields(mapping_file, "\t")
    }

    names, seqs = gen.read_fasta(input_fasta)
    with open(output_fasta, "w") as outfile:
        for i, name in enumerate(names):
            gene = mappings[name]
            seq_code = codes[gene]
            if seq_code == code:
                outfile.write(">{0}\n{1}\n".format(name, seqs[i]))
Пример #5
0
def run_simulations(simulation_sets, required_simulations):
    '''
    Run the simulations
    '''

    for motif_set in simulation_sets:

        motif_file = motif_set[0]
        simulation_output_file = motif_set[1]
        stops_count_output_file = motif_set[2]

        # clean up and previous simulations
        gen.remove_file(simulation_output_file)
        gen.remove_file(stops_count_output_file)

        motif_list = gen.read_many_fields(motif_file, ",")
        # get motifs, avoid header if there is one
        motifs = [i[0] for i in motif_list if i[0][0] != "#"]

        # get the number of stop codons found in the real set
        real_count = se.get_stop_codon_count(motifs)

        # generate simulated motifs using motif set
        print('Simulating {0}...'.format(motif_file))
        se.generate_motifs_sets(motifs, required_simulations, output_file = simulation_output_file)
        simulated_motif_sets = gen.read_many_fields(simulation_output_file, "|")
        with open(stops_count_output_file, "w") as output:
        	output.write('id,stop_count\n')
        	output.write('real,{0}\n'.format(real_count))
        	for i, simulated_set in enumerate(simulated_motif_sets):
        		stop_count = se.get_stop_codon_count(simulated_set)
        		output.write('{0},{1}\n'.format(i+1, stop_count))
Пример #6
0
def check_exon_files(input_bed1, input_bed2):
    """
    Do a sanity check to make sure there are no coding exons in the
    non coding exons file and vice versa.

    Args:
        input_bed1 (str): path to the first bed file
        input_bed2 (str): path to the second bed file

    Returns:

    """

    bed_lines1 = gen.read_many_fields(input_bed1, "\t")
    bed_lines2 = gen.read_many_fields(input_bed2, "\t")
    transcripts1 = [line[3] for line in bed_lines1]
    transcripts2 = [line[3] for line in bed_lines2]
    # get any overlap
    overlap = list(set(transcripts1) & set(transcripts2))
    if len(overlap):
        print(
            "Something's gone wrong. Coding exons and non coding exons are present in both files..."
        )
        raise Exception
    return True
Пример #7
0
def fasta_from_intervals(bed_file, fasta_file, genome_fasta, force_strand = True, names = False):
    """
    Takes a bed file and creates a fasta file with the corresponding sequences.
    Credit: Rosina Savisaar

    Args:
        bed_file (str): the bed file path to create fasta from
        fasta_file (str): the output fasta file path
        genome_fasta (str): the file path to the genome fasta file
        names (bool): if False, the fasta record names will be generated from the sequence coordinates.
        names (bool): if True, the fasta name will correspond to whatever is in the 'name' field of the bed file
    """

    #if the index file exists, check whether the expected features are present
    genome_fasta_index = genome_fasta + '.fai'
    if(os.path.exists(genome_fasta_index)):
        bed_chrs = sorted(list(set([entry[0] for entry in gen.read_many_fields(bed_file, "\t")])))
        index_chrs = sorted(list(set([entry[0] for entry in gen.read_many_fields(genome_fasta_index, "\t")])))
        if(not set(bed_chrs).issubset(set(index_chrs))):
            gen.remove_file(genome_fasta_index)

    bedtools_args = ["bedtools", "getfasta", "-s", "-fi", genome_fasta, "-bed", bed_file, "-fo", fasta_file]
    if not force_strand:
        del bedtools_args[2]
    if names:
        bedtools_args.append("-name")
    gen.run_process(bedtools_args)
    names, seqs = gen.read_fasta(fasta_file)
    seqs = [i.upper() for i in seqs]
    gen.write_to_fasta(names, seqs, fasta_file)
Пример #8
0
 def test_clean_alleles(self):
     input = "test_data/snp_ops/test_clean_alleles/input.vcf"
     expected = "test_data/snp_ops/test_clean_alleles/expected.vcf"
     observed = "test_data/snp_ops/test_clean_alleles/observed.vcf"
     clean_alleles(input, observed)
     expected = gen.read_many_fields(expected, "\t")
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(observed, expected)
Пример #9
0
def extract_second_seqs(input_bed, input_file, genome_fasta, output_dir):
    """
    Extract the second set of sequences
    """
    # get a set of ids that correspond only to lincrna entries
    id_file = "{0}/lncrna_ids.txt".format(output_dir)
    extract_lncrna_only(input_file, id_file)

    # now keep only the bed entries that are in the id list
    filtered_bed = "{0}.filtered".format(input_bed)
    ids = gen.read_many_fields(id_file, "\t")
    bed_entries = gen.read_many_fields(input_bed, "\t")
    with open(filtered_bed, "w") as outfile:
        for entry in bed_entries:
            if entry[3] in ids:
                outfile.write("{0}\n".format("\t".join(entry)))

    # now write the bed to an exon bed
    exons_bed = "{0}.exons.bed".format(input_bed)
    fo.entries_to_bed(filtered_bed, exons_bed, hg38=True)
    # now get the exon sequences
    exons_fasta = "{0}.exons.fasta".format(input_bed)
    fo.fasta_from_intervals(exons_bed,
                            exons_fasta,
                            genome_fasta,
                            force_strand=True,
                            names=True)

    # now generate the full transcript for multi exon transcripts
    transcripts_fasta = "{0}.multi_exon_transcripts.fasta".format(input_bed)
    names, seqs = gen.read_fasta(exons_fasta)
    seq_list = collections.defaultdict(lambda: collections.defaultdict())
    for i, name in enumerate(names):
        id = ".".join(name.split("(")[0].split(".")[:-1])
        exon = int(name.split("(")[0].split(".")[-1])
        seq_list[id][exon] = seqs[i]
    with open(transcripts_fasta, "w") as outfile:
        for id in sorted(seq_list):
            if len(seq_list[id]) > 1:
                exon_list = []
                for exon in sorted(seq_list[id]):
                    exon_list.append(seq_list[id][exon])
                seq = "".join(exon_list)
                if "N" not in seq and len(seq) >= 200:
                    # convert names to : here as otherwise it will run sorting later
                    id = ":".join(id.split("."))
                    outfile.write(">{0}\n{1}\n".format(id, seq))

    # blast to get paralogous families
    blast_db_path = "{0}/bast_db".format(output_directory)
    output_blast_file = "{0}/blast_output.csv".format(output_directory)
    families_file = "{0/families.txt".format(output_directory)
    gen.create_output_directories(blast_db_path)
    cons.filter_families(transcripts_fasta,
                         output_blast_file,
                         families_file,
                         database_path=blast_db_path,
                         clean_run=True)
Пример #10
0
 def test_sort_bed(self):
     infile = "test_data/bam_ops/test_sort_bed/test_intersect_bed_A_file_unsorted.bed"
     expected_file = "test_data/bam_ops/test_sort_bed/expected_test_intersect_bed_A_file.bed"
     observed_file = "test_data/bam_ops/test_sort_bed/observed_test_sort_bed.bed"
     gen.remove_file(observed_file)
     sort_bed(infile, observed_file)
     expected = gen.read_many_fields(expected_file, "\t")
     observed = gen.read_many_fields(observed_file, "\t")
     self.assertEqual(expected, observed)
Пример #11
0
 def test_compare_PSI_haplotypes(self):
     SNPs = "test_data/bam_ops/test_compare_PSI_haplotypes/SNPs.bed"
     bam_folder = "test_data/bam_ops/test_compare_PSI_haplotypes/bam_folder"
     expected = gen.read_many_fields("test_data/bam_ops/test_compare_PSI_haplotypes/expected.txt", "\t")
     observed = "test_data/bam_ops/test_compare_PSI_haplotypes/observed.txt"
     gen.remove_file(observed)
     compare_PSI_haplotypes(SNPs, bam_folder, observed, 3)
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(expected, observed)
Пример #12
0
 def test_get_snp_relative_cds_position_plus_strand_split(self):
     relative_exon_position_file = gen.read_many_fields("test_data/snp_ops/test_get_snp_relative_cds_position_plus_strand_split/test_snp_relative_exon_position.bed", "\t")
     bed_file = "test_data/snp_ops/test_get_snp_relative_cds_position_plus_strand_split/full_bed.bed"
     expected = gen.read_many_fields("test_data/snp_ops/test_get_snp_relative_cds_position_plus_strand_split/expected_test_snp_relative_cds_position.bed", "\t")
     observed = "test_data/snp_ops/test_get_snp_relative_cds_position_plus_strand_split/observed_test_snp_relative_cds_position.bed"
     gen.remove_file(observed)
     get_snp_relative_cds_position(relative_exon_position_file, observed, bed_file)
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(observed, expected)
Пример #13
0
 def test_tabix(self):
     bed_file = "test_data/snp_ops/test_tabix/test_tabix_bed.txt"
     expected = gen.read_many_fields("test_data/snp_ops/test_tabix/expected_test_tabix.txt", "\t")
     observed = "test_data/snp_ops/observed_test_tabix.bed"
     gen.remove_file(observed)
     vcf = "../source_data/ALL.wgs.phase3_shapeit2_mvncall_integrated_v5b.20130502.sites.gz"
     tabix(bed_file, observed, vcf)
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(sorted(observed), sorted(expected))
Пример #14
0
 def test_get_snp_type(self):
     cds_list = gen.read_many_fields("test_data/snp_ops/test_get_snp_type/test_cdss.bed", "\t")
     snp_info = gen.read_many_fields("test_data/snp_ops/test_get_snp_type/test_snp_cds_info.bed", "\t")
     expected = gen.read_many_fields("test_data/snp_ops/test_get_snp_type/expected_snp_types.bed", "\t")
     observed = []
     for i, snp in enumerate(snp_info):
         cds_codon, snp_codon, mutation_type = get_snp_type(cds_list[i][0], snp)
         observed.append([cds_codon, snp_codon, mutation_type])
     self.assertEqual(observed, expected)
Пример #15
0
 def test_group_flags(self):
     input_bed = "test_data/bam_ops/test_group_flags/test_tabix.bed"
     observed = "test_data/bam_ops/test_group_flags/observed_test_group_flags.bed"
     gen.remove_file(observed)
     flag_start = 3
     group_flags(input_bed, observed, flag_start)
     expected = gen.read_many_fields("test_data/bam_ops/test_group_flags/expected_test_group_flags.bed", "\t")
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(expected, observed)
Пример #16
0
 def test_filter_by_snp_type(self):
     input_snps = "test_data/snp_ops/test_filter_by_snp_type/input_snps.bed"
     expected = "test_data/snp_ops/test_filter_by_snp_type/expected_snps.bed"
     observed = "test_data/snp_ops/test_filter_by_snp_type/observed_snps.bed"
     gen.remove_file(observed)
     filter_by_snp_type(input_snps, observed, "non")
     expected = gen.read_many_fields(expected, "\t")
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(observed, expected)
Пример #17
0
 def test_remove_overlaps2(self):
     in_bed = "test_data/bed_ops/test_remove_overlaps2/in.bed"
     expected = "test_data/bed_ops/test_remove_overlaps2/expected.bed"
     observed = "test_data/bed_ops/test_remove_overlaps2/observed.bed"
     gen.remove_file(observed)
     remove_overlaps(in_bed, observed)
     expected = gen.read_many_fields(expected, "\t")
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(expected, observed)
Пример #18
0
 def test_merge_and_header(self):
     file1 = "test_data/snp_ops/test_merge_and_header/file1.txt"
     file2 = "test_data/snp_ops/test_merge_and_header/file2.txt"
     expected = "test_data/snp_ops/test_merge_and_header/expected.txt"
     observed = "test_data/snp_ops/test_merge_and_header/observed.txt"
     gen.remove_file(observed)
     merge_and_header(file1, file2, observed)
     expected = gen.read_many_fields(expected, "\t")
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(expected, observed)
Пример #19
0
 def test_check_coding(self):
     exon_file = "test_data/bed_ops/test_check_coding/exons.bed"
     CDS_file = "test_data/bed_ops/test_check_coding/CDSs.bed"
     expected = "test_data/bed_ops/test_check_coding/expected_check_coding.bed"
     observed = "test_data/bed_ops/test_check_coding/observed_check_coding.bed"
     gen.remove_file(observed)
     check_coding(exon_file, CDS_file, observed)
     expected = gen.read_many_fields(expected, "\t")
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(expected, observed)
Пример #20
0
 def test_intersect_bed_intersect_bedops(self):
     A_file = "test_data/bam_ops/test_intersect_bed_intersect_bedops/test_intersect_bed_A_file.bed"
     B_file = "test_data/bam_ops/test_intersect_bed_intersect_bedops/test_intersect_bed_B_file.bed"
     expected_file = "test_data/bam_ops/test_intersect_bed_intersect_bedops/expected_test_intersect_bed_intersect_bedops.bed"
     observed_file = "test_data/bam_ops/test_intersect_bed_intersect_bedops/observed_test_intersect_bed_intersect_bedops.bed"
     gen.remove_file(observed_file)
     intersect_bed(A_file, B_file, output_file = observed_file, no_dups = False, use_bedops = True, intersect = True)
     expected = gen.read_many_fields(expected_file, "\t")
     observed = gen.read_many_fields(observed_file, "\t")
     self.assertEqual(expected, observed)
Пример #21
0
 def test_get_dinucleotides_contact(self):
     motif_set = gen.read_many_fields(
         "test_data/test_get_dinucleotides_concat/motif_set.txt", ",")
     motif_set = [i[0] for i in motif_set]
     expected = gen.read_many_fields(
         "test_data/test_get_dinucleotides_concat/expected_dinucleotides.txt",
         ",")
     expected = [i[0] for i in expected]
     observed = se.get_dinucleotides(motif_set, concat_motifs=True)
     self.assertEqual(expected, observed)
Пример #22
0
 def test_get_dinucleotides_reg(self):
     motif_set = gen.read_many_fields(
         "test_data/test_get_dinucleotides_reg/motif_set.txt", ",")
     motif_set = [i[0] for i in motif_set]
     expected = gen.read_many_fields(
         "test_data/test_get_dinucleotides_reg/expected_dinucleotides.txt",
         ",")
     expected = [i[0] for i in expected]
     observed = se.get_dinucleotides(motif_set)
     self.assertEqual(expected, observed)
Пример #23
0
 def test_intersect_bed_force_strand_hit_count(self):
     A_file = "test_data/bam_ops/test_intersect_bed_force_strand_hit_count/test_intersect_bed_A_file.bed"
     B_file = "test_data/bam_ops/test_intersect_bed_force_strand_hit_count/test_intersect_bed_B_file.bed"
     expected_file = "test_data/bam_ops/test_intersect_bed_force_strand_hit_count/expected_test_intersect_bed_force_strand_hit_count.bed"
     observed_file = "test_data/bam_ops/test_intersect_bed_force_strand_hit_count/observed_test_intersect_bed_force_strand_hit_count.bed"
     gen.remove_file(observed_file)
     intersect_bed(A_file, B_file, output_file = observed_file, no_dups = False, force_strand = True, hit_count = True)
     expected = gen.read_many_fields(expected_file, "\t")
     observed = gen.read_many_fields(observed_file, "\t")
     self.assertEqual(expected, observed)
Пример #24
0
 def test_get_descriptions(self):
     gtf = "test_data/bed_ops/test_get_descriptions/descriptions.gtf"
     names = ["ENST100", "ENST7", "ENST0003", "ENST5"]
     expected = "test_data/bed_ops/test_get_descriptions/expected_get_descriptions.txt"
     observed = "test_data/bed_ops/test_get_descriptions/observed_get_descriptions.txt"
     gen.remove_file(observed)
     get_descriptions(names, gtf, observed)
     expected = gen.read_many_fields(expected, "\t")
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(expected, observed)
Пример #25
0
 def test_extract_exon_junctions_window(self):
     exons = "test_data/bed_ops/test_extract_exon_junctions_window/test_extract_exon_junctions.bed"
     observed = "test_data/bed_ops/test_extract_exon_junctions_window/observed_test_extract_exon_window_junctions.bed"
     gen.remove_file(observed)
     extract_exon_junctions(exons, observed, 30)
     expected = gen.read_many_fields(
         "test_data/bed_ops/test_extract_exon_junctions_window/expected_test_extract_exon_window_junctions.bed",
         "\t")
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(expected, observed)
Пример #26
0
 def test_filter_bed_from_fasta(self):
     bed = "test_data/bed_ops/test_filter_bed_from_fasta/test_filter_bed_from_fasta.bed"
     fasta = "test_data/bed_ops/test_filter_bed_from_fasta/test_filter_bed_from_fasta.fasta"
     observed = "test_data/bed_ops/test_filter_bed_from_fasta/observed_test_filter_bed_from_fasta.bed"
     gen.remove_file(observed)
     expected = "test_data/bed_ops/test_filter_bed_from_fasta/expected_test_filter_bed_from_fasta.bed"
     filter_bed_from_fasta(bed, fasta, observed)
     expected = gen.read_many_fields(expected, "\t")
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(expected, observed)
Пример #27
0
 def test_extract_exons(self):
     gtf = "test_data/bed_ops/test_extract_exons/test_extract_exons.gtf"
     observed = "test_data/bed_ops/test_extract_exons/observed_test_extract_exons.bed"
     gen.remove_file(observed)
     extract_exons(gtf, observed)
     expected = gen.read_many_fields(
         "test_data/bed_ops/test_extract_exons/expected_test_extract_exons.bed",
         "\t")
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(expected, observed)
Пример #28
0
 def test_extract_features_cdss_stops(self):
     gtf_file = "test_data/bed_ops/test_extract_features_cdss_stops/test_extract_features.gtf"
     observed = "test_data/bed_ops/test_extract_features_cdss_stops/observed_test_extract_features_cdss_stops.bed"
     gen.remove_file(observed)
     extract_features(gtf_file, observed, ['CDS', 'stop_codon'])
     expected = gen.read_many_fields(
         "test_data/bed_ops/test_extract_features_cdss_stops/expected_test_extract_features_cdss_stops.bed",
         "\t")
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(observed, expected)
Пример #29
0
 def test_intersect_bed_overlap(self):
     A_file = "test_data/bam_ops/test_intersect_bed_overlap/test_intersect_bed_A_file.bed"
     B_file = "test_data/bam_ops/test_intersect_bed_overlap/test_intersect_bed_B_file.bed"
     expected_file = "test_data/bam_ops/test_intersect_bed_overlap/expected_test_intersect_bed_overlap.bed"
     observed_file = "test_data/bam_ops/test_intersect_bed_overlap/observed_test_intersect_bed_overlap.bed"
     gen.remove_file(observed_file)
     intersect_bed(A_file, B_file, output_file = observed_file, no_dups = False, overlap = 0.5)
     expected = gen.read_many_fields(expected_file, "\t")
     observed = gen.read_many_fields(observed_file, "\t")
     self.assertEqual(expected, observed)
Пример #30
0
 def test_filter_exon_junctions(self):
     exon_junctions_file = "test_data/bed_ops/test_filter_exon_junctions/exon_junctions.bed"
     exons_file = "test_data/bed_ops/test_filter_exon_junctions/exons.bed"
     expected = "test_data/bed_ops/test_filter_exon_junctions/expected_filter_exon_junctions.bed"
     observed = "test_data/bed_ops/test_filter_exon_junctions/observed_filter_exon_junctions.bed"
     gen.remove_file(observed)
     filter_exon_junctions(exon_junctions_file, exons_file, observed)
     expected = gen.read_many_fields(expected, "\t")
     observed = gen.read_many_fields(observed, "\t")
     self.assertEqual(expected, observed)