def test_break2_strand_ns(self, tmp_path): input_file = tmp_path / "inputs.tsv" input_file.write_text( mock_file_content({ COLUMNS.break1_chromosome: '1', COLUMNS.break1_position_start: 1, COLUMNS.break1_position_end: 1, COLUMNS.break1_strand: STRAND.POS, COLUMNS.break1_orientation: ORIENT.LEFT, COLUMNS.break2_chromosome: '1', COLUMNS.break2_position_start: 10, COLUMNS.break2_position_end: 10, COLUMNS.break2_strand: STRAND.NS, COLUMNS.break2_orientation: ORIENT.RIGHT, COLUMNS.stranded: True, COLUMNS.opposing_strands: False, })) with pytest.raises(NotSpecifiedError) as err: print(err) bpps = read_bpp_from_input_file(input_file, expand_strand=False, expand_orient=False) bpps = read_bpp_from_input_file(input_file, expand_strand=True, expand_orient=True) assert len(bpps) == 1 assert bpps[0].break2.strand == STRAND.POS
def test_unstranded_with_strand_calls(self): fh = self.build_filehandle({ COLUMNS.break1_chromosome: '1', COLUMNS.break1_position_start: 1, COLUMNS.break1_position_end: 1, COLUMNS.break1_strand: STRAND.POS, COLUMNS.break1_orientation: ORIENT.RIGHT, COLUMNS.break2_chromosome: '1', COLUMNS.break2_position_start: 10, COLUMNS.break2_position_end: 10, COLUMNS.break2_strand: STRAND.NEG, COLUMNS.break2_orientation: ORIENT.RIGHT, COLUMNS.stranded: False, COLUMNS.opposing_strands: True }) bpps = read_bpp_from_input_file(fh, expand_strand=False, expand_orient=False) self.assertEqual(1, len(bpps)) self.assertEqual(STRAND.NS, bpps[0].break1.strand) self.assertEqual(STRAND.NS, bpps[0].break2.strand) bpps = read_bpp_from_input_file(fh, expand_strand=False, expand_orient=True) self.assertEqual(1, len(bpps)) self.assertEqual(STRAND.NS, bpps[0].break1.strand) self.assertEqual(STRAND.NS, bpps[0].break2.strand) fh = self.build_filehandle({ COLUMNS.break1_chromosome: '1', COLUMNS.break1_position_start: 1, COLUMNS.break1_position_end: 1, COLUMNS.break1_strand: STRAND.POS, COLUMNS.break1_orientation: ORIENT.RIGHT, COLUMNS.break2_chromosome: '1', COLUMNS.break2_position_start: 10, COLUMNS.break2_position_end: 10, COLUMNS.break2_strand: STRAND.NEG, COLUMNS.break2_orientation: ORIENT.RIGHT, COLUMNS.stranded: True, COLUMNS.opposing_strands: True }) bpps = read_bpp_from_input_file(fh, expand_strand=True, expand_orient=False) self.assertEqual(1, len(bpps)) self.assertEqual(STRAND.POS, bpps[0].break1.strand) self.assertEqual(STRAND.NEG, bpps[0].break2.strand) bpps = read_bpp_from_input_file(fh, expand_strand=True, expand_orient=True) self.assertEqual(1, len(bpps)) self.assertEqual(STRAND.POS, bpps[0].break1.strand) self.assertEqual(STRAND.NEG, bpps[0].break2.strand)
def test_unstranded_with_strand_calls(self, tmp_path): input_file = tmp_path / "inputs.tsv" input_file.write_text( mock_file_content({ COLUMNS.break1_chromosome: '1', COLUMNS.break1_position_start: 1, COLUMNS.break1_position_end: 1, COLUMNS.break1_strand: STRAND.POS, COLUMNS.break1_orientation: ORIENT.RIGHT, COLUMNS.break2_chromosome: '1', COLUMNS.break2_position_start: 10, COLUMNS.break2_position_end: 10, COLUMNS.break2_strand: STRAND.NEG, COLUMNS.break2_orientation: ORIENT.RIGHT, COLUMNS.stranded: False, COLUMNS.opposing_strands: True, })) bpps = read_bpp_from_input_file(input_file, expand_strand=False, expand_orient=False) assert len(bpps) == 1 assert bpps[0].break1.strand == STRAND.NS assert bpps[0].break2.strand == STRAND.NS input_file = tmp_path / "inputs2.tsv" input_file.write_text( mock_file_content({ COLUMNS.break1_chromosome: '1', COLUMNS.break1_position_start: 1, COLUMNS.break1_position_end: 1, COLUMNS.break1_strand: STRAND.POS, COLUMNS.break1_orientation: ORIENT.RIGHT, COLUMNS.break2_chromosome: '1', COLUMNS.break2_position_start: 10, COLUMNS.break2_position_end: 10, COLUMNS.break2_strand: STRAND.NEG, COLUMNS.break2_orientation: ORIENT.RIGHT, COLUMNS.stranded: True, COLUMNS.opposing_strands: True, })) bpps = read_bpp_from_input_file(input_file, expand_strand=True, expand_orient=False) assert len(bpps) == 1 assert bpps[0].break1.strand == STRAND.POS assert bpps[0].break2.strand == STRAND.NEG
def test_clustering_events(self): # this file contains 2 events that should be clustered and produce a valid bpp bpps = [] for bpp in sorted( read_bpp_from_input_file(CLUSTERED_EVENTS), key=lambda x: (x.break1.chr, x.break2.chr) ): if bpp.data[COLUMNS.protocol] == PROTOCOL.GENOME: bpps.append(bpp) print(bpp) self.assertEqual(2, len(bpps)) clusters = merge_breakpoint_pairs(bpps, 200, 25) self.assertEqual(1, len(clusters)) for cluster, input_pairs in sorted( clusters.items(), key=lambda x: (x[1][0].break1.chr, x[1][0].break2.chr) ): print(cluster) for ip in input_pairs: print('\t', ip) print(cluster.flatten()) # BPP(Breakpoint(15:67333604L), Breakpoint(15:67333606R), opposing=False) self.assertEqual('L', cluster.break1.orient) self.assertEqual('R', cluster.break2.orient) self.assertEqual('15', cluster.break1.chr) self.assertEqual('15', cluster.break2.chr) self.assertEqual(67333604, cluster.break1.start) self.assertEqual(67333606, cluster.break2.start) self.assertEqual(67333604, cluster.break1.end) self.assertEqual(67333606, cluster.break2.end)
def test_break2_strand_ns(self): fh = self.build_filehandle({ COLUMNS.break1_chromosome: '1', COLUMNS.break1_position_start: 1, COLUMNS.break1_position_end: 1, COLUMNS.break1_strand: STRAND.POS, COLUMNS.break1_orientation: ORIENT.LEFT, COLUMNS.break2_chromosome: '1', COLUMNS.break2_position_start: 10, COLUMNS.break2_position_end: 10, COLUMNS.break2_strand: STRAND.NS, COLUMNS.break2_orientation: ORIENT.RIGHT, COLUMNS.stranded: True, COLUMNS.opposing_strands: False }) with self.assertRaises(NotSpecifiedError) as err: print(err) bpps = read_bpp_from_input_file(fh, expand_strand=False, expand_orient=False) bpps = read_bpp_from_input_file(fh, expand_strand=True, expand_orient=True) self.assertEqual(1, len(bpps)) self.assertEqual(STRAND.POS, bpps[0].break2.strand)
def run_main(self, inputfile, file_type, strand_specific=False): outputfile = os.path.join(TEMP_OUTPUT, file_type + '.tab') args = [ 'mavis', SUBCOMMAND.CONVERT, '-o', outputfile, '-n', inputfile, '--file_type', file_type, '--strand_specific', strand_specific ] with patch.object(sys, 'argv', args): self.assertEqual(0, main()) print('output', outputfile) self.assertTrue(unique_exists(outputfile)) result = {} for pair in read_bpp_from_input_file(outputfile): result.setdefault(pair.tracking_id, []).append(pair) return result
def test_pairing(self): args = [ 'mavis', SUBCOMMAND.PAIR, '-n', os.path.join(DATA_PREFIX, 'pairing_annotations.tab'), '-o', TEMP_OUTPUT, '--annotations', os.path.join(DATA_PREFIX, 'pairing_reference_annotations_file.tab') ] with patch.object(sys, 'argv', args): self.assertEqual(0, main()) # make sure the output file exists output = os.path.join(TEMP_OUTPUT, 'mavis_paired_A36971_A36973.tab') self.assertTrue(os.path.exists(output)) # check that the expected pairings are present bpps = read_bpp_from_input_file(output, expand_strand=False, expand_orient=False) self.assertEqual(6, len(bpps))
def test_mocked_events(self): # none of the 24 events in the mocked file should cluster together # if we change the mock file we may need to update this function bpps = [] for bpp in sorted(read_bpp_from_input_file(FULL_BASE_EVENTS), key=lambda x: (x.break1.chr, x.break2.chr)): if bpp.data[COLUMNS.protocol] == PROTOCOL.GENOME: bpps.append(bpp) print(bpp) self.assertEqual(28, len(bpps)) clusters = merge_breakpoint_pairs(bpps, 10, 10) for cluster, input_pairs in sorted(clusters.items(), key=lambda x: (x[1][0].break1.chr, x[1][0].break2.chr)): print(cluster) for ip in input_pairs: print('\t', ip) self.assertEqual(1, len(input_pairs)) self.assertEqual(len(bpps), len(clusters))
def test_break1_orient_ns(self): fh = self.build_filehandle({ COLUMNS.break1_chromosome: '1', COLUMNS.break1_position_start: 1, COLUMNS.break1_position_end: 1, COLUMNS.break1_strand: STRAND.POS, COLUMNS.break1_orientation: ORIENT.NS, COLUMNS.break2_chromosome: '1', COLUMNS.break2_position_start: 10, COLUMNS.break2_position_end: 10, COLUMNS.break2_strand: STRAND.POS, COLUMNS.break2_orientation: ORIENT.RIGHT, COLUMNS.stranded: False, COLUMNS.opposing_strands: False }) bpps = read_bpp_from_input_file(fh, expand_strand=False, expand_orient=True) self.assertEqual(1, len(bpps)) self.assertEqual(ORIENT.LEFT, bpps[0].break1.orient)
def run_main(self, inputfile, file_type, strand_specific=False): outputfile = os.path.join(TEMP_OUTPUT, file_type + '.tab') args = [ 'mavis', SUBCOMMAND.CONVERT, '-o', outputfile, '-n', inputfile, '--file_type', file_type, '--strand_specific', strand_specific, ] with patch.object(sys, 'argv', args): main() print('output', outputfile) assert glob_exists(outputfile, n=1) result = {} for pair in read_bpp_from_input_file(outputfile): result.setdefault(pair.data['tracking_id'], []).append(pair) return result