def test_compare_breaks(self): true_bed_filename = lr_test.in_path('test_compare_breaks_true.bedpe') pred_bed_filename = lr_test.in_path('test_compare_breaks_pred.bedpe') pred_to_match, true_to_match, is_sv_filtered = compare_breaks(pred_bed_filename) assert(len(pred_to_match) == 0) assert(len(true_to_match) == 0) assert(len(is_sv_filtered) == 0) pred_to_match, true_to_match, is_sv_filtered = compare_breaks(pred_bed_filename, true_bed_filename, max_dist = 0) assert(len(is_sv_filtered) == 0) assert(len(true_to_match) == 1) assert(true_to_match['true_1'] == set(['pred_3'])) assert(len(pred_to_match) == 1) assert(pred_to_match['pred_3'] == set(['true_1'])) pred_to_match, true_to_match, is_sv_filtered = compare_breaks(pred_bed_filename, true_bed_filename, max_dist = 500) assert(len(is_sv_filtered) == 0) assert(len(true_to_match) == 2) assert(true_to_match['true_1'] == set(['pred_3', 'pred_4'])) assert(true_to_match['true_2'] == set(['pred_6'])) assert(len(pred_to_match) == 3) assert(pred_to_match['pred_6'] == set(['true_2'])) assert(pred_to_match['pred_3'] == set(['true_1'])) assert(pred_to_match['pred_4'] == set(['true_1'])) pred_to_match, true_to_match, is_sv_filtered = compare_breaks(pred_bed_filename, true_bed_filename, max_dist = 500, window_loci = [('chr4', [1000], [3000])]) assert(len(is_sv_filtered) == 1) assert(len(true_to_match) == 1) assert(true_to_match['true_1'] == set(['pred_3', 'pred_4'])) assert(len(pred_to_match) == 2) assert(pred_to_match['pred_3'] == set(['true_1'])) assert(pred_to_match['pred_4'] == set(['true_1']))
def test_merge_breaks(self): true_bed_filename = lr_test.in_path('test_merge_breaks.bedpe') out_bed_filename = lr_test.out_path('test_merge_breaks_out.bedpe') merge_breaks(true_bed_filename, out_bed_filename, merge_win = 50) self.assert_files_same(out_bed_filename, lr_test.in_path('test_merge_breaks_win50.bedpe'), False) merge_breaks(true_bed_filename, out_bed_filename, merge_win = 500) self.assert_files_same(out_bed_filename, lr_test.in_path('test_merge_breaks_win500.bedpe'), False) merge_breaks(true_bed_filename, out_bed_filename, merge_win = 500, max_nmates = 0) with open(out_bed_filename, 'r') as f: lines = [line for line in f.readlines() if not line.startswith('#')] assert(len(lines) == 0) res_df = merge_breaks(true_bed_filename, out_bed_filename, merge_win = 500, max_range = 1000) self.assertEqual(set(res_df['name']), set(['F', 'B', 'D'])) res_df = merge_breaks(true_bed_filename, out_bed_filename, merge_win = 50, max_range = 1000) self.assertEqual(set(res_df['name']), set(['F', 'B', 'D', 'A', 'C'])) res_df = merge_breaks(lr_test.in_path('test_merge_breaks_chain.bedpe'), out_bed_filename, merge_win = 1000, max_range = 1000) self.assertEqual(list(res_df['name']), ['B', 'D']) self.assertEqual(list(res_df['info']), ['NMATES1=2;NMATES2=1', 'NMATES1=2;NMATES2=1'])
def test_sort_and_merge(self): in_filename = lr_test.in_path('test_merge_regions.bed') regions = [] with open(in_filename, 'r') as f: for line in f: fields = line.strip().split() regions.append((fields[0], int(fields[1]), int(fields[2]))) out_regions = sort_and_merge(regions, 1000) # File created using BedTool's slopBed and mergeBed. out_filename = lr_test.in_path('test_merge_regions_d1000.bed') with open(out_filename, 'r') as f: for idx, line in enumerate(f): fields = line.strip().split() assert((fields[0], int(fields[1]), int(fields[2])) == out_regions[idx]) out_regions = sort_and_merge(regions, 0) out_filename = lr_test.in_path('test_merge_regions_d0.bed') with open(out_filename, 'r') as f: for idx, line in enumerate(f): fields = line.strip().split() assert((fields[0], int(fields[1]), int(fields[2])) == out_regions[idx])
def setUp(self): bc_map = {} bc_map['4-GCAGTTAGAGAAAT'] = 0 bc_map['1-GCTCCTGTATGGCG'] = 1 bc_map['1-GATGAAGTACTGAA'] = 2 bc_map['8-ACTTTCGTTAATCT'] = 3 bc_map['8-GGGTAGTCAGTAAG'] = 4 bc_map['2-TCCCGTTCCTGGAT'] = 5 bc_map['6-CGTCAATCTTGGCA'] = 6 bc_map['3-TGTCGAGTCCGCTG'] = 7 bc_map['6-GCGAAGTCCCTAAG'] = 8 bc_map['3-TCAGTGGTCCAATC'] = 9 bc_map['2-CAGAAAGTCTTGCA'] = 10 bc_map['6-ATGCGTAGTTTCTA'] = 11 bc_map['6-ACTCAGCAGACATA'] = 12 bc_map['1-GGGACATCTCCACC'] = 13 bc_map['2-TCCTTATCCTGGAT'] = 14 bc_map['3-GTCGTAAGTGACAT'] = 15 bc_map['2-TCCCGTTCCTGGAT'] = 16 bc_map['1-CATTCTCATCGTCA'] = 17 bc_map['8-GTTCTTTCTTCGAG'] = 18 bc_map['5-CGTCAAGTTAGACA'] = 19 bc_freq = np.ones((len(bc_map), )) * 0.01 read_freq = np.ones((len(bc_map), )) * 0.01 self.targets = lr_test.in_path('test_breakpoint_analyzer_targets.bed') self.target_analyzer = BreakpointAnalyzer(bam_filename, bc_freq, bc_map, read_freq=read_freq, regions_file=self.targets, extend=0) self.target_analyzer_100 = BreakpointAnalyzer( bam_filename, bc_freq, bc_map, read_freq=read_freq, regions_file=self.targets, extend=100) self.analyzer = BreakpointAnalyzer(bam_filename, bc_freq, bc_map, read_freq=read_freq, regions_file=None, extend=0) self.analyzer_100 = BreakpointAnalyzer(bam_filename, bc_freq, bc_map, read_freq=read_freq, regions_file=None, extend=100)
def test_merge_multiple_breaks(self): true_bed_filename = lr_test.in_path('test_merge_breaks.bedpe') out_bed_filename = lr_test.out_path('test_merge_breaks_out.bedpe') merge_multiple_breaks([true_bed_filename, true_bed_filename], out_bed_filename, merge_win = 50) # Remove the names, because these might not match self.compare_dfs_without_names(out_bed_filename, lr_test.in_path('test_merge_breaks_win50.bedpe')) true_bed_filename1 = lr_test.in_path('test_merge_breaks1.bedpe') true_bed_filename2 = lr_test.in_path('test_merge_breaks2.bedpe') merge_multiple_breaks([true_bed_filename1, true_bed_filename2], out_bed_filename, merge_win = 50) self.compare_dfs_without_names(out_bed_filename, lr_test.in_path('test_merge_breaks_win50.bedpe')) merge_multiple_breaks([true_bed_filename1, true_bed_filename2], out_bed_filename, merge_win = 500) self.compare_dfs_without_names(out_bed_filename, lr_test.in_path('test_merge_breaks_win500.bedpe'))
def test_compare_multiple_breaks(self): filenames = ['test_merge_breaks.bedpe', 'test_merge_breaks1.bedpe', 'test_merge_breaks2.bedpe'] in_bedpes = [lr_test.in_path(s) for s in filenames] merged_df = compare_multiple_breaks(in_bedpes, [0, 1, 2], lr_test.out_path('test_compare_breaks_out.bedpe')) merged_df = merged_df.sort(['qual', 'chrom1', 'start1', 'stop1', 'chrom2', 'start2', 'stop2'], ascending = [0, 1, 1, 1, 1, 1, 1]) assert(np.all(merged_df['0_filtered'] == False)) assert(np.all(merged_df['1_filtered'] == False)) assert(np.all(merged_df['2_filtered'] == False)) assert(np.all(merged_df['0_correct'] == False)) assert(np.all(merged_df['1_correct'] == False)) assert(np.all(merged_df['2_correct'] == False)) self.assertEqual(list(merged_df['0_qual']), [50, 20, 20, 10, 10]) self.assertEqual(list(merged_df['1_qual']), [0, 20, 0, 10, 10]) self.assertEqual(list(merged_df['2_qual']), [50, 0, 20, 5, 0]) self.assertEqual(list(merged_df['0_dist']), [-1, -1, 100, -1, -1]) self.assertEqual(list(merged_df['1_dist']), [0, -1, 0, -1, -1]) self.assertEqual(list(merged_df['2_dist']), [-1, 0, 100, -1, 0])
#!/usr/bin/env python # # Copyright (c) 2014 10X Genomics, Inc. All rights reserved. # import os import os.path import longranger.test as lr_test import tenkit.bio_io as tk_io from longranger.sv.phase_utils import * TEST_FILE_DIR = lr_test.in_path('sv_phasing') class TestSvPhaseUtils(lr_test.UnitTestBase): def setUp(self): pass def test_select_best_hap(self): test_sv_phasing_file = os.path.join(TEST_FILE_DIR, 'test_sv_phasing.tsv') sv_phasing_df = select_best_hap(test_sv_phasing_file) self.assertEqual(sv_phasing_df.loc[1010, 1].called_hap, '1') self.assertEqual(sv_phasing_df.loc[1010, 2].called_hap, '1') self.assertEqual(sv_phasing_df.loc[1195, 2].called_hap, '0') self.assertEqual(len(sv_phasing_df.loc[1195]), 1) self.assertEqual(not 92 in sv_phasing_df.index.levels[0], True) self.assertEqual(sv_phasing_df.loc[2034, 1].called_hap, '0') self.assertEqual(sv_phasing_df.loc[2034, 2].called_hap, '1') sv_phasing_df = select_best_hap(test_sv_phasing_file, True) self.assertEqual(sv_phasing_df.loc[92, 2].called_hap, '.')
#!/usr/bin/env python # # Copyright (c) 2014 10X Genomics, Inc. All rights reserved. # import tenkit.bam as tk_bam import longranger.test as lr_test from longranger.sv.breakpoint_analyzer import * bam_filename = lr_test.in_path('test_count_reads.bam') class TestBreakpointAnalyzer(lr_test.UnitTestBase): def setUp(self): bc_map = {} bc_map['4-GCAGTTAGAGAAAT'] = 0 bc_map['1-GCTCCTGTATGGCG'] = 1 bc_map['1-GATGAAGTACTGAA'] = 2 bc_map['8-ACTTTCGTTAATCT'] = 3 bc_map['8-GGGTAGTCAGTAAG'] = 4 bc_map['2-TCCCGTTCCTGGAT'] = 5 bc_map['6-CGTCAATCTTGGCA'] = 6 bc_map['3-TGTCGAGTCCGCTG'] = 7 bc_map['6-GCGAAGTCCCTAAG'] = 8 bc_map['3-TCAGTGGTCCAATC'] = 9 bc_map['2-CAGAAAGTCTTGCA'] = 10 bc_map['6-ATGCGTAGTTTCTA'] = 11 bc_map['6-ACTCAGCAGACATA'] = 12 bc_map['1-GGGACATCTCCACC'] = 13 bc_map['2-TCCTTATCCTGGAT'] = 14 bc_map['3-GTCGTAAGTGACAT'] = 15