def test_write_si_pos_from_exons(self): exons = { "FBtr0077958": [[ '2L', 'FlyBase', 'CDS', 1143371, 1143410, '.', '+', '0', 'gene_id "FBgn0031307"; gene_symbol "MFS3"; transcript_id "FBtr0077958"; transcript_symbol "MFS3-RA";' ], [ '2L', 'FlyBase', 'CDS', 1145444, 1146472, '.', '+', '2', 'gene_id "FBgn0031307"; gene_symbol "MFS3"; transcript_id "FBtr0077958"; transcript_symbol "MFS3-RA";' ], [ '2L', 'FlyBase', 'CDS', 1146539, 1146825, '.', '+', '2', 'gene_id "FBgn0031307"; gene_symbol "MFS3"; transcript_id "FBtr0077958"; transcript_symbol "MFS3-RA";' ]], "FBtr0077959": [[ '2L', 'FlyBase', 'CDS', 1143371, 1143410, '.', '-', '0', 'gene_id "FBgn0031307"; gene_symbol "MFS3"; transcript_id "FBtr0077959"; transcript_symbol "MFS3-RA";' ], [ '2L', 'FlyBase', 'CDS', 1145444, 1146472, '.', '-', '2', 'gene_id "FBgn0031307"; gene_symbol "MFS3"; transcript_id "FBtr0077959"; transcript_symbol "MFS3-RA";' ], [ '2L', 'FlyBase', 'CDS', 1146539, 1146825, '.', '-', '2', 'gene_id "FBgn0031307"; gene_symbol "MFS3"; transcript_id "FBtr0077959"; transcript_symbol "MFS3-RA";' ]] } outbed = "tests/write_si_pos_from_exons_observed.bed" expected = "tests/write_si_pos_from_exons_expected.bed" write_si_pos_from_exons(exons, outbed, add_chr=False) observed = rw.read_as_string(outbed) expected = rw.read_as_string(expected) self.assertEqual(expected, observed)
def test_get_spliced_reads_exon_filters_exon_start_window(self): reads_file = "tests/get_spliced_reads_exon_start_window_input_reads.bed" output_file_spliced = "tests/get_spliced_reads_exon_filters_exon_start_window_observed_spliced.bed" output_file_unspliced = "tests/get_spliced_reads_exon_filters_exon_start_window_observed_unspliced.bed" exon_junctions_file = "tests/get_spliced_reads_input_junctions.bed" expected_file_spliced = "tests/get_spliced_reads_exon_filters_exon_start_window_expected_spliced.bed" expected_file_unspliced = "tests/get_spliced_reads_exon_start_window_expected_unspliced.bed" exons = {} exons["FBtr0078168"] = [ ["chr2L", "havana", "exon", 2, 6, "FBtr0078168", ".", "+"], ["chr2L", "havana", "exon", 11, 13, "FBtr0078168", ".", "+"], ["chr2L", "havana", "exon", 17, 19, "FBtr0078168", ".", "+"], ["chr2L", "havana", "exon", 22, 26, "FBtr0078168", ".", "+"] ] exons["FBtr0078169"] = [ ["chr2L", "havana", "exon", 22, 26, "FBtr0078169", ".", "-"], ["chr2L", "havana", "exon", 17, 19, "FBtr0078169", ".", "-"], ["chr2L", "havana", "exon", 11, 13, "FBtr0078169", ".", "-"], ["chr2L", "havana", "exon", 2, 6, "FBtr0078169", ".", "-"] ] get_spliced_reads(reads_file, exon_junctions_file, output_file_spliced, output_file_unspliced, exons=exons, overhang=0, filter_start=1, filter_end=3) expected_spliced = rw.read_as_string(expected_file_spliced) expected_unspliced = rw.read_as_string(expected_file_unspliced) expected = expected_spliced + expected_unspliced observed_spliced = rw.read_as_string(output_file_spliced) observed_unspliced = rw.read_as_string(output_file_unspliced) observed = observed_spliced + observed_unspliced self.assertEqual(expected, observed)
def test_snr_bed(self): inbed = "tests/snr_bed_input.bed" outbed = "tests/snr_bed_observed.bed" snr_bed(inbed, outbed) expected = rw.read_as_string("tests/snr_bed_expected.bed") observed = rw.read_as_string(outbed) self.assertEqual(expected, observed)
def test_snr_bed_fiveprime(self): inbed = "tests/snr_bed_fiveprime_input.bed" outbed = "tests/snr_bed_fiveprime_observed.bed" snr_bed(inbed, outbed, five_prime_most=True) expected = rw.read_as_string("tests/snr_bed_fiveprime_expected.bed") observed = rw.read_as_string(outbed) self.assertEqual(expected, observed)
def test_get_transcripts(self): gtf = "tests/get_transcripts_input.gtf" observed_file = "tests/get_transcripts_observed.bed" expected_file = "tests/get_transcripts_expected.bed" get_transcripts(gtf, observed_file) expected = rw.read_as_string(expected_file) observed = rw.read_as_string(observed_file) self.assertEqual(expected, observed)
def test_density_per_transcript(self): exon_file = "tests/density_per_transcript_input_exons.gtf" polII_bed = "tests/density_per_transcript_input_polII.bed" observed = "tests/density_per_transcript_observed.txt" expected = "tests/density_per_transcript_expected.txt" density_per_transcript(exon_file, polII_bed, observed) expected = rw.read_as_string(expected) observed = rw.read_as_string(observed) self.assertEqual(expected, observed)
def test_merge_bed(self): in_bed = "tests/merge_bed_input.bed" out_bed = "tests/merge_bed_output.bed" expected_bed = "tests/merge_bed_expected.bed" distance = 2 merge_bed(in_bed, out_bed, distance) expected = rw.read_as_string(expected_bed) observed = rw.read_as_string(out_bed) self.assertEqual(expected, observed)
def test_extend_intervals(self): input_file = "tests/extend_intervals_input.bed" expected = "tests/extend_intervals_expected.bed" observed = "tests/extend_intervals_observed.bed" left_shift = 4 right_shift = 5 extend_intervals(input_file, observed, left_shift, right_shift) expected = rw.read_as_string(expected) observed = rw.read_as_string(observed) self.assertEqual(expected, observed)
def test_extend_intervals_three_prime(self): input_file = "tests/extend_intervals_input.bed" expected = "tests/extend_intervals_three_prime_expected.bed" observed = "tests/extend_intervals_three_prime_observed.bed" left_shift = 4 right_shift = 2 extend_intervals(input_file, observed, left_shift, right_shift, three_prime=True) expected = rw.read_as_string(expected) observed = rw.read_as_string(observed) self.assertEqual(expected, observed)
def test_extract_3ss(self): exons = {} exons["FBgn1"] = [ [ "2L", "FlyBase", "exon", 1, 4, ".", "+", ".", "gene_id \"FBgn1\"; gene_symbol \"drl\"; transcript_id \"FBtr33\"; transcript_symbol \"drl-RA\";" ], [ "2L", "FlyBase", "exon", 9, 10, ".", "+", ".", "gene_id \"FBgn1\"; gene_symbol \"drl\"; transcript_id \"FBtr33\"; transcript_symbol \"drl-RA\";" ], [ "2L", "FlyBase", "exon", 12, 14, ".", "+", ".", "gene_id \"FBgn1\"; gene_symbol \"drl\"; transcript_id \"FBtr33\"; transcript_symbol \"drl-RA\";" ], [ "2L", "FlyBase", "exon", 17, 18, ".", "+", ".", "gene_id \"FBgn1\"; gene_symbol \"drl\"; transcript_id \"FBtr33\"; transcript_symbol \"drl-RA\";" ], [ "2L", "FlyBase", "exon", 21, 23, ".", "+", ".", "gene_id \"FBgn1\"; gene_symbol \"drl\"; transcript_id \"FBtr33\"; transcript_symbol \"drl-RA\";" ] ] exons["FBgn2"] = [ [ "2L", "FlyBase", "exon", 22, 23, ".", "-", ".", "gene_id \"FBgn2\"; gene_symbol \"drl\"; transcript_id \"FBtr3\"; transcript_symbol \"drl-RA\";" ], [ "2L", "FlyBase", "exon", 18, 19, ".", "-", ".", "gene_id \"FBgn2\"; gene_symbol \"drl\"; transcript_id \"FBtr3\"; transcript_symbol \"drl-RA\";" ], [ "2L", "FlyBase", "exon", 13, 14, ".", "-", ".", "gene_id \"FBgn2\"; gene_symbol \"drl\"; transcript_id \"FBtr3\"; transcript_symbol \"drl-RA\";" ], [ "2L", "FlyBase", "exon", 4, 7, ".", "-", ".", "gene_id \"FBgn2\"; gene_symbol \"drl\"; transcript_id \"FBtr3\"; transcript_symbol \"drl-RA\";" ] ] output_file = "tests/extract_3ss_observed.bed" expected = rw.read_as_string("tests/extract_3ss_expected.bed") extract_3ss(exons, output_file) observed = rw.read_as_string(output_file) self.assertEqual(expected, observed)
def extend_intervals(input_file, output_file, left_shift, right_shift, remove_chr=False, add_chr=False, names_file=None, three_prime = False): """ Given a BED file, make a new BED file with intervals that start _left_shift_ nt upstream of the interval starts in the original file and end _right_shift_ nt to the right. Note that for intervals on the negative strand, right and left will be reversed. :param input_file: input BED file :param output_file: output BED file :param left_shift: distance between old and new interval start :param right_shift: distance between old interval start and new interval end :param remove_chr: if True, remove "chr" from the chromosome name in the output :param add_chr: if True, prefix "chr" to the chromosome name in the output :param names_file: if specified, then reads will only be processed if the ID is in the specified file :param three_prime: if True, extend around interval ends instead :return: None """ remove_counter = 0 names = [] if names_file: names = rw.read_as_string(names_file).split("\n") plus = "+" if three_prime: plus = "-" temp_left = left_shift left_shift = right_shift right_shift = temp_left with open(input_file) as bed, open(output_file, "w") as out_bed: reader = csv.reader(bed, delimiter="\t") writer = csv.writer(out_bed, delimiter="\t") for line in reader: if (not names_file) or (line[3] in names): if names_file: names.remove(line[3]) template = line.copy() # make a BED interval starting _left_shift_ nt before the 5' end and ending _right_shift_ nt after it. if len(line) >= 6: template = line[:6] if line[5] == plus: template[1] = int(line[1]) - left_shift template[2] = int(line[1]) + right_shift else: template[1] = int(line[2]) - right_shift template[2] = int(line[2]) + left_shift # write the interval into a BED file, ignoring cases where the # read is so close to the start of the chromosome that you end up with a # negative coordinate if template[1] >= 0: if remove_chr: template[0] = template[0].lstrip("chr") if add_chr: template[0] = "chr{0}".format(template[0]) writer.writerow(template) else: remove_counter = remove_counter + 1 print("Removed because would have exceeded the chromosome: {0}.".format(remove_counter))
def test_get_spliced_reads_no_filters(self): reads_file = "tests/get_spliced_reads_input_reads.bed" output_file_spliced = "tests/get_spliced_reads_observed_spliced.bed" output_file_unspliced = "tests/get_spliced_reads_observed_unspliced.bed" exon_junctions_file = "tests/get_spliced_reads_input_junctions.bed" expected_file_spliced = "tests/get_spliced_reads_expected_spliced.bed" expected_file_unspliced = "tests/get_spliced_reads_expected_unspliced.bed" get_spliced_reads(reads_file, exon_junctions_file, output_file_spliced, output_file_unspliced, exons=False, overhang=0) expected_spliced = rw.read_as_string(expected_file_spliced) expected_unspliced = rw.read_as_string(expected_file_unspliced) expected = expected_spliced + expected_unspliced observed_spliced = rw.read_as_string(output_file_spliced) observed_unspliced = rw.read_as_string(output_file_unspliced) observed = observed_spliced + observed_unspliced self.assertEqual(expected, observed)