Exemplo n.º 1
0
    def test_compare_breaks(self):
        true_bed_filename = lr_test.in_path('test_compare_breaks_true.bedpe')
        pred_bed_filename = lr_test.in_path('test_compare_breaks_pred.bedpe')
        pred_to_match, true_to_match, is_sv_filtered = compare_breaks(pred_bed_filename)
        assert(len(pred_to_match) == 0)
        assert(len(true_to_match) == 0)
        assert(len(is_sv_filtered) == 0)

        pred_to_match, true_to_match, is_sv_filtered  = compare_breaks(pred_bed_filename, true_bed_filename, max_dist = 0)
        assert(len(is_sv_filtered) == 0)
        assert(len(true_to_match) == 1)
        assert(true_to_match['true_1'] == set(['pred_3']))
        assert(len(pred_to_match) == 1)
        assert(pred_to_match['pred_3'] == set(['true_1']))

        pred_to_match, true_to_match, is_sv_filtered  = compare_breaks(pred_bed_filename, true_bed_filename,
                                                                              max_dist = 500)
        assert(len(is_sv_filtered) == 0)
        assert(len(true_to_match) == 2)
        assert(true_to_match['true_1'] == set(['pred_3', 'pred_4']))
        assert(true_to_match['true_2'] == set(['pred_6']))
        assert(len(pred_to_match) == 3)
        assert(pred_to_match['pred_6'] == set(['true_2']))
        assert(pred_to_match['pred_3'] == set(['true_1']))
        assert(pred_to_match['pred_4'] == set(['true_1']))

        pred_to_match, true_to_match, is_sv_filtered = compare_breaks(pred_bed_filename, true_bed_filename,
            max_dist = 500, window_loci = [('chr4', [1000], [3000])])
        assert(len(is_sv_filtered) == 1)
        assert(len(true_to_match) == 1)
        assert(true_to_match['true_1'] == set(['pred_3', 'pred_4']))
        assert(len(pred_to_match) == 2)
        assert(pred_to_match['pred_3'] == set(['true_1']))
        assert(pred_to_match['pred_4'] == set(['true_1']))
Exemplo n.º 2
0
    def test_merge_breaks(self):
        true_bed_filename = lr_test.in_path('test_merge_breaks.bedpe')
        out_bed_filename = lr_test.out_path('test_merge_breaks_out.bedpe')
        merge_breaks(true_bed_filename, out_bed_filename, merge_win = 50)
        self.assert_files_same(out_bed_filename, lr_test.in_path('test_merge_breaks_win50.bedpe'), False)
        merge_breaks(true_bed_filename, out_bed_filename, merge_win = 500)
        self.assert_files_same(out_bed_filename, lr_test.in_path('test_merge_breaks_win500.bedpe'), False)
        merge_breaks(true_bed_filename, out_bed_filename, merge_win = 500, max_nmates = 0)
        with open(out_bed_filename, 'r') as f:
            lines = [line for line in f.readlines() if not line.startswith('#')]
        assert(len(lines) == 0)

        res_df = merge_breaks(true_bed_filename, out_bed_filename, merge_win = 500, max_range = 1000)
        self.assertEqual(set(res_df['name']), set(['F', 'B', 'D']))
        res_df = merge_breaks(true_bed_filename, out_bed_filename, merge_win = 50, max_range = 1000)
        self.assertEqual(set(res_df['name']), set(['F', 'B', 'D', 'A', 'C']))

        res_df = merge_breaks(lr_test.in_path('test_merge_breaks_chain.bedpe'), out_bed_filename,
            merge_win = 1000, max_range = 1000)
        self.assertEqual(list(res_df['name']), ['B', 'D'])
        self.assertEqual(list(res_df['info']), ['NMATES1=2;NMATES2=1', 'NMATES1=2;NMATES2=1'])
Exemplo n.º 3
0
    def test_sort_and_merge(self):
        in_filename = lr_test.in_path('test_merge_regions.bed')
        regions = []
        with open(in_filename, 'r') as f:
            for line in f:
                fields = line.strip().split()
                regions.append((fields[0], int(fields[1]), int(fields[2])))

        out_regions = sort_and_merge(regions, 1000)
        # File created using BedTool's slopBed and mergeBed.
        out_filename = lr_test.in_path('test_merge_regions_d1000.bed')
        with open(out_filename, 'r') as f:
            for idx, line in enumerate(f):
                fields = line.strip().split()
                assert((fields[0], int(fields[1]), int(fields[2])) == out_regions[idx])

        out_regions = sort_and_merge(regions, 0)
        out_filename = lr_test.in_path('test_merge_regions_d0.bed')
        with open(out_filename, 'r') as f:
            for idx, line in enumerate(f):
                fields = line.strip().split()
                assert((fields[0], int(fields[1]), int(fields[2])) == out_regions[idx])
Exemplo n.º 4
0
    def setUp(self):
        bc_map = {}
        bc_map['4-GCAGTTAGAGAAAT'] = 0
        bc_map['1-GCTCCTGTATGGCG'] = 1
        bc_map['1-GATGAAGTACTGAA'] = 2
        bc_map['8-ACTTTCGTTAATCT'] = 3
        bc_map['8-GGGTAGTCAGTAAG'] = 4
        bc_map['2-TCCCGTTCCTGGAT'] = 5
        bc_map['6-CGTCAATCTTGGCA'] = 6
        bc_map['3-TGTCGAGTCCGCTG'] = 7
        bc_map['6-GCGAAGTCCCTAAG'] = 8
        bc_map['3-TCAGTGGTCCAATC'] = 9
        bc_map['2-CAGAAAGTCTTGCA'] = 10
        bc_map['6-ATGCGTAGTTTCTA'] = 11
        bc_map['6-ACTCAGCAGACATA'] = 12
        bc_map['1-GGGACATCTCCACC'] = 13
        bc_map['2-TCCTTATCCTGGAT'] = 14
        bc_map['3-GTCGTAAGTGACAT'] = 15
        bc_map['2-TCCCGTTCCTGGAT'] = 16
        bc_map['1-CATTCTCATCGTCA'] = 17
        bc_map['8-GTTCTTTCTTCGAG'] = 18
        bc_map['5-CGTCAAGTTAGACA'] = 19

        bc_freq = np.ones((len(bc_map), )) * 0.01
        read_freq = np.ones((len(bc_map), )) * 0.01
        self.targets = lr_test.in_path('test_breakpoint_analyzer_targets.bed')
        self.target_analyzer = BreakpointAnalyzer(bam_filename,
                                                  bc_freq,
                                                  bc_map,
                                                  read_freq=read_freq,
                                                  regions_file=self.targets,
                                                  extend=0)
        self.target_analyzer_100 = BreakpointAnalyzer(
            bam_filename,
            bc_freq,
            bc_map,
            read_freq=read_freq,
            regions_file=self.targets,
            extend=100)
        self.analyzer = BreakpointAnalyzer(bam_filename,
                                           bc_freq,
                                           bc_map,
                                           read_freq=read_freq,
                                           regions_file=None,
                                           extend=0)
        self.analyzer_100 = BreakpointAnalyzer(bam_filename,
                                               bc_freq,
                                               bc_map,
                                               read_freq=read_freq,
                                               regions_file=None,
                                               extend=100)
Exemplo n.º 5
0
    def test_merge_multiple_breaks(self):
        true_bed_filename = lr_test.in_path('test_merge_breaks.bedpe')
        out_bed_filename = lr_test.out_path('test_merge_breaks_out.bedpe')
        merge_multiple_breaks([true_bed_filename, true_bed_filename], out_bed_filename, merge_win = 50)
        # Remove the names, because these might not match
        self.compare_dfs_without_names(out_bed_filename, lr_test.in_path('test_merge_breaks_win50.bedpe'))

        true_bed_filename1 = lr_test.in_path('test_merge_breaks1.bedpe')
        true_bed_filename2 = lr_test.in_path('test_merge_breaks2.bedpe')
        merge_multiple_breaks([true_bed_filename1, true_bed_filename2], out_bed_filename, merge_win = 50)
        self.compare_dfs_without_names(out_bed_filename, lr_test.in_path('test_merge_breaks_win50.bedpe'))
        merge_multiple_breaks([true_bed_filename1, true_bed_filename2], out_bed_filename, merge_win = 500)
        self.compare_dfs_without_names(out_bed_filename, lr_test.in_path('test_merge_breaks_win500.bedpe'))
Exemplo n.º 6
0
 def test_compare_multiple_breaks(self):
     filenames = ['test_merge_breaks.bedpe', 'test_merge_breaks1.bedpe',
                  'test_merge_breaks2.bedpe']
     in_bedpes = [lr_test.in_path(s) for s in filenames]
     merged_df = compare_multiple_breaks(in_bedpes, [0, 1, 2],
         lr_test.out_path('test_compare_breaks_out.bedpe'))
     merged_df = merged_df.sort(['qual', 'chrom1', 'start1', 'stop1', 'chrom2', 'start2', 'stop2'],
         ascending = [0, 1, 1, 1, 1, 1, 1])
     assert(np.all(merged_df['0_filtered'] == False))
     assert(np.all(merged_df['1_filtered'] == False))
     assert(np.all(merged_df['2_filtered'] == False))
     assert(np.all(merged_df['0_correct'] == False))
     assert(np.all(merged_df['1_correct'] == False))
     assert(np.all(merged_df['2_correct'] == False))
     self.assertEqual(list(merged_df['0_qual']), [50, 20, 20, 10, 10])
     self.assertEqual(list(merged_df['1_qual']), [0, 20, 0, 10, 10])
     self.assertEqual(list(merged_df['2_qual']), [50, 0, 20, 5, 0])
     self.assertEqual(list(merged_df['0_dist']), [-1, -1, 100, -1, -1])
     self.assertEqual(list(merged_df['1_dist']), [0, -1, 0, -1, -1])
     self.assertEqual(list(merged_df['2_dist']), [-1, 0, 100, -1, 0])
Exemplo n.º 7
0
#!/usr/bin/env python
#
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved.
#

import os
import os.path
import longranger.test as lr_test
import tenkit.bio_io as tk_io
from longranger.sv.phase_utils import *

TEST_FILE_DIR = lr_test.in_path('sv_phasing')


class TestSvPhaseUtils(lr_test.UnitTestBase):
    def setUp(self):
        pass

    def test_select_best_hap(self):
        test_sv_phasing_file = os.path.join(TEST_FILE_DIR,
                                            'test_sv_phasing.tsv')
        sv_phasing_df = select_best_hap(test_sv_phasing_file)
        self.assertEqual(sv_phasing_df.loc[1010, 1].called_hap, '1')
        self.assertEqual(sv_phasing_df.loc[1010, 2].called_hap, '1')
        self.assertEqual(sv_phasing_df.loc[1195, 2].called_hap, '0')
        self.assertEqual(len(sv_phasing_df.loc[1195]), 1)
        self.assertEqual(not 92 in sv_phasing_df.index.levels[0], True)
        self.assertEqual(sv_phasing_df.loc[2034, 1].called_hap, '0')
        self.assertEqual(sv_phasing_df.loc[2034, 2].called_hap, '1')
        sv_phasing_df = select_best_hap(test_sv_phasing_file, True)
        self.assertEqual(sv_phasing_df.loc[92, 2].called_hap, '.')
Exemplo n.º 8
0
#!/usr/bin/env python
#
# Copyright (c) 2014 10X Genomics, Inc. All rights reserved.
#

import tenkit.bam as tk_bam
import longranger.test as lr_test
from longranger.sv.breakpoint_analyzer import *

bam_filename = lr_test.in_path('test_count_reads.bam')


class TestBreakpointAnalyzer(lr_test.UnitTestBase):
    def setUp(self):
        bc_map = {}
        bc_map['4-GCAGTTAGAGAAAT'] = 0
        bc_map['1-GCTCCTGTATGGCG'] = 1
        bc_map['1-GATGAAGTACTGAA'] = 2
        bc_map['8-ACTTTCGTTAATCT'] = 3
        bc_map['8-GGGTAGTCAGTAAG'] = 4
        bc_map['2-TCCCGTTCCTGGAT'] = 5
        bc_map['6-CGTCAATCTTGGCA'] = 6
        bc_map['3-TGTCGAGTCCGCTG'] = 7
        bc_map['6-GCGAAGTCCCTAAG'] = 8
        bc_map['3-TCAGTGGTCCAATC'] = 9
        bc_map['2-CAGAAAGTCTTGCA'] = 10
        bc_map['6-ATGCGTAGTTTCTA'] = 11
        bc_map['6-ACTCAGCAGACATA'] = 12
        bc_map['1-GGGACATCTCCACC'] = 13
        bc_map['2-TCCTTATCCTGGAT'] = 14
        bc_map['3-GTCGTAAGTGACAT'] = 15