def test_03(self): """ Check the duplication removal - simple test; 2 identical fusions """ args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) fusion_1 = Fusion("chr1","chr2",15000,20000,"+","+","Experiment","",True) fusion_2 = Fusion("chr1","chr2",15000,20000,"+","+","Experiment","",True) experiment = FusionDetectionExperiment("Experiment_1") experiment.add_fusion(fusion_1) experiment.add_fusion(fusion_2) self.assertEqual(len(experiment), 2) genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_03.bed","hg18", 200000) experiment.annotate_genes(genes) experiment.remove_duplicates(args) self.assertEqual(len(experiment), 1) for fusion in experiment: self.assertEqual(len(fusion.annotated_genes_left), 1) self.assertEqual(len(fusion.annotated_genes_right), 1)
def test_08(self): """ Check the duplication removal - 2 fusions where one is missing annotations -> one should be lost because it isn't gene spanning """ args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) fusion_1 = Fusion("chr1","chr2",15000,20000,"+","+","Experiment","",True) fusion_2 = Fusion("chr1","chr2",15000,30000,"+","+","Experiment","",True) experiment = FusionDetectionExperiment("Experiment_1") experiment.add_fusion(fusion_1) experiment.add_fusion(fusion_2) self.assertEqual(len(experiment), 2) genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_08.bed","hg18", 200000) experiment.annotate_genes(genes) experiment.remove_duplicates(args) self.assertEqual(len(experiment), 1) for fusion in experiment: self.assertEqual(len(fusion.annotated_genes_left), 1) self.assertEqual(len(fusion.annotated_genes_right), 1)
def test_05(self): """ Check the duplication removal - 2 fusions; one has two overlapping left genes, the other only one, and for right is vice versa. The 1 gene is always a subset of the others 2 genes and must therefore be treated as identical annotations """ args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) fusion_1 = Fusion("chr1","chr2",15000,20050,"+","+","Experiment","",True)#(1A):(2A,2B) fusion_2 = Fusion("chr1","chr2",15050,20000,"+","+","Experiment","",True)#(1A,1B):(2A) experiment = FusionDetectionExperiment("Experiment_1") experiment.add_fusion(fusion_1) experiment.add_fusion(fusion_2) self.assertEqual(len(experiment), 2) genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_05.bed","hg18", 200000) experiment.annotate_genes(genes) experiment.remove_duplicates(args) self.assertEqual(len(experiment), 1) for fusion in experiment: self.assertEqual(len(fusion.annotated_genes_left), 1) # subset = (1A) self.assertEqual(len(fusion.annotated_genes_right), 1) # subset = (2A)
def test_01(self): """ #1: break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] #2: break1 break2 | | [ --- Gene B --- ] [ --- Gene A --- ] """ args_on = CLI([ '--acceptor-donor-order-specific-matching', '-f', 'summary', '-s', '' ]) args_off = CLI(['-f', 'summary', '-s', '']) gene_A = Gene("A", False) gene_B = Gene("B", False) genes = GeneAnnotation("hg19") genes.add_annotation(gene_A, "1", 10000, 20000) genes.add_annotation(gene_B, "1", 80000, 90000) fusion_1 = Fusion("chr1", "chr1", 15000, 85000, "+", "+", "Experiment_1", "1", True) experiment_1 = FusionDetectionExperiment("Experiment_1") experiment_1.add_fusion(fusion_1) experiment_1.annotate_genes(genes) fusion_2 = Fusion("chr1", "chr1", 85000, 15000, "+", "+", "Experiment_2", "2", True) experiment_2 = FusionDetectionExperiment("Experiment_2") experiment_2.add_fusion(fusion_2) experiment_2.annotate_genes(genes) self.assertEqual(len(fusion_1.annotated_genes_left), 1) self.assertEqual(len(fusion_2.annotated_genes_left), 1) self.assertEqual(fusion_1.acceptor_donor_direction, AD_DIRECTION_FORWARD) self.assertEqual(fusion_2.acceptor_donor_direction, AD_DIRECTION_REVERSE) overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiment_1) overlapping_complex.add_experiment(experiment_2) overlap = overlapping_complex.overlay_fusions(True, False, args_on) self.assertEqual(len(overlap[0]), 0) overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiment_1) overlapping_complex.add_experiment(experiment_2) overlap = overlapping_complex.overlay_fusions(True, False, args_off) self.assertEqual(len(overlap[0]), 1)
def test_01(self): fusion_1 = Fusion("chr1","chrX",15000,15000,None,None,"-","+","Experiment_1") fusion_1.add_location({'left':[fusion_1.get_left_chromosome(), fusion_1.get_left_break_position()], 'right':[fusion_1.get_right_chromosome(), fusion_1.get_right_break_position()], 'id':1, 'dataset':fusion_1.dataset_name }) self.assertEqual( fusion_1.left_break_position , 15000 ) self.assertEqual( fusion_1.right_break_position , 15000 ) self.assertEqual( fusion_1.left_strand , STRAND_REVERSE ) self.assertEqual( fusion_1.right_strand , STRAND_FORWARD )
def test_01(self): """ #1: break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] #2: break1 break2 | | [ --- Gene B --- ] [ --- Gene A --- ] """ args_on = CLI(['--acceptor-donor-order-specific-matching','-f','summary','-s','']) args_off = CLI(['-f','summary','-s','']) gene_A = Gene("A", False) gene_B = Gene("B", False) genes = GeneAnnotation("hg19") genes.add_annotation(gene_A,"1",10000,20000) genes.add_annotation(gene_B,"1",80000,90000) fusion_1 = Fusion("chr1","chr1",15000,85000,None,None,"+","+","Experiment_1") fusion_1.add_location({'left':[fusion_1.get_left_chromosome(), fusion_1.get_left_break_position()], 'right':[fusion_1.get_right_chromosome(), fusion_1.get_right_break_position()], 'id':1, 'dataset':fusion_1.dataset_name }) experiment_1 = FusionDetectionExperiment("Experiment_1") experiment_1.add_fusion(fusion_1) experiment_1.annotate_genes(genes) fusion_2 = Fusion("chr1","chr1",85000,15000,None,None,"+","+","Experiment_2") fusion_2.add_location({ 'left':[fusion_2.get_left_chromosome(), fusion_2.get_left_break_position()], 'right':[fusion_2.get_right_chromosome(), fusion_2.get_right_break_position()], 'id':2, 'dataset':fusion_2.dataset_name }) experiment_2 = FusionDetectionExperiment("Experiment_2") experiment_2.add_fusion(fusion_2) experiment_2.annotate_genes(genes) self.assertEqual(len(fusion_1.annotated_genes_left) , 1) self.assertEqual(len(fusion_2.annotated_genes_left) , 1) self.assertEqual( fusion_1.acceptor_donor_direction , AD_DIRECTION_FORWARD ) self.assertEqual( fusion_2.acceptor_donor_direction , AD_DIRECTION_REVERSE ) overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiment_1) overlapping_complex.add_experiment(experiment_2) overlap = overlapping_complex.overlay_fusions(True,False,args_on) self.assertEqual(len(overlap[0]), 0) overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiment_1) overlapping_complex.add_experiment(experiment_2) overlap = overlapping_complex.overlay_fusions(True,False,args_off) self.assertEqual(len(overlap[0]), 1)
def test_01(self): fusion_1 = Fusion("chr1","chrX",15000,15000,"-","+","Experiment_1","1",True) self.assertEqual( fusion_1.left_break_position , 15000 ) self.assertEqual( fusion_1.right_break_position , 15000 ) self.assertEqual( fusion_1.left_strand , STRAND_REVERSE ) self.assertEqual( fusion_1.right_strand , STRAND_FORWARD )
def test_12(self): """ {a} = 1 , 4 , 5 {b} = 2 , 6 {c} = 7 , 8 {d} = 3 | f1 | f2 | f3 | f4 | f5 | f6 | f7 | f8 | ---+----+----+----+----+----+----+----+----+ f1 | * | | | a | a | | | | ---+----+----+----+----+----+----+----+----+ f2 | | * | | | | b | | | ---+----+----+----+----+----+----+----+----+ f3 | | | d | | | | | | ---+----+----+----+----+----+----+----+----+ f4 | a | | | * | a | | | | ---+----+----+----+----+----+----+----+----+ f5 | a | | | a | * | | | | ---+----+----+----+----+----+----+----+----+ f6 | | b | | | | * | | | ---+----+----+----+----+----+----+----+----+ f7 | | | | | | | * | d | ---+----+----+----+----+----+----+----+----+ f8 | | | | | | | c | * | ---+----+----+----+----+----+----+----+----+ """ args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) fusion_1 = Fusion("chr1","chr1",15010,80040,"+","+","Experiment","",True) fusion_2 = Fusion("chr2","chr2",15030,80030,"+","+","Experiment","",True) fusion_3 = Fusion("chr4","chr4",15050,80070,"+","+","Experiment","",True) fusion_4 = Fusion("chr1","chr1",15060,80010,"+","+","Experiment","",True) fusion_5 = Fusion("chr1","chr1",15020,80050,"+","+","Experiment","",True) fusion_6 = Fusion("chr2","chr2",15080,80080,"+","+","Experiment","",True) fusion_7 = Fusion("chr3","chr3",15040,80020,"+","+","Experiment","",True) fusion_8 = Fusion("chr3","chr3",15070,80060,"+","+","Experiment","",True) experiment = FusionDetectionExperiment("Experiment_1") experiment.add_fusion(fusion_2) experiment.add_fusion(fusion_8) experiment.add_fusion(fusion_6) experiment.add_fusion(fusion_4) experiment.add_fusion(fusion_5) experiment.add_fusion(fusion_3) experiment.add_fusion(fusion_1) experiment.add_fusion(fusion_7) self.assertEqual(len(experiment), 8) genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_12.bed","hg18", 200000) experiment.annotate_genes(genes) experiment.remove_duplicates(args) self.assertEqual(len(experiment), 4)
def test_11(self): """ May not have overlap: {1} = a,b,c {2} = a,b,d Break {1}: | Break {2}: : | : a: [ : : ] b: [ | : ] c: : [ : ] d: [ : ] """ args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) fusion_1 = Fusion("chr1","chr2",15000,27500,"+","+","Experiment","",True) fusion_2 = Fusion("chr1","chr2",15000,25500,"+","+","Experiment","",True) experiment = FusionDetectionExperiment("Experiment_1") experiment.add_fusion(fusion_1) experiment.add_fusion(fusion_2) self.assertEqual(len(experiment), 2) genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_11.bed","hg18", 200000) experiment.annotate_genes(genes) for fusion in experiment: if(fusion.get_right_break_position() == 30000): self.assertEqual(len(fusion.annotated_genes_right), 3) self.assertTrue("NM_00002A" in [str(gene_name) for gene_name in fusion.annotated_genes_right]) self.assertTrue("NM_00002B" in [str(gene_name) for gene_name in fusion.annotated_genes_right]) self.assertTrue("NM_00002C" in [str(gene_name) for gene_name in fusion.annotated_genes_right]) if(fusion.get_right_break_position() == 25000): self.assertEqual(len(fusion.annotated_genes_right), 3) self.assertTrue("NM_00002A" in [str(gene_name) for gene_name in fusion.annotated_genes_right]) self.assertTrue("NM_00002B" in [str(gene_name) for gene_name in fusion.annotated_genes_right]) self.assertTrue("NM_00002D" in [str(gene_name) for gene_name in fusion.annotated_genes_right]) experiment.remove_duplicates(args) self.assertEqual(len(experiment), 2)
def test_04(self): """ Check the duplication removal - simple test; 2 identical fusions but checking presevation of the gene names from different annotations """ args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) gene_1_hg18 = Gene("gene_1", False) gene_1_hg19 = Gene("gene_1", False) gene_2_hg18 = Gene("gene_2", False) gene_2_hg19 = Gene("gene_2", False) fusion_hg18 = Fusion("chr1","chr2",15000,20000,"+","+","Experiment","",True) fusion_hg19 = Fusion("chr1","chr2",15500,20500,"+","+","Experiment","",True) fusion_hg18.annotate_genes_left([gene_1_hg18]) fusion_hg19.annotate_genes_left([gene_1_hg19]) fusion_hg18.annotate_genes_right([gene_2_hg18]) fusion_hg19.annotate_genes_right([gene_2_hg19]) experiment = FusionDetectionExperiment("Experiment_1") experiment.genes_spanning_left_junction = [True] experiment.genes_spanning_right_junction = [True] experiment.add_fusion(fusion_hg18) experiment.add_fusion(fusion_hg19) self.assertEqual(len(experiment), 2) experiment.remove_duplicates(args) self.assertEqual(len(experiment), 1) for fusion in experiment: self.assertEqual(len(fusion.annotated_genes_left), 2)
def test_04(self): """ #1: --> <-- break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] #2: <-- --> break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] Ensure the strand of the merged fusion is not set! """ gene_A = Gene("A", False) gene_B = Gene("B", False) genes = GeneAnnotation("hg19") genes.add_annotation(gene_A, "1", 10000, 20000) genes.add_annotation(gene_B, "1", 80000, 90000) fusion_1 = Fusion("chr1", "chr1", 15000, 85000, "+", "-", "Experiment_1", 3, True) fusion_2 = Fusion("chr1", "chr1", 15000, 85000, "-", "+", "Experiment_2", 4, True) experiment_1 = FusionDetectionExperiment("Experiment_1") experiment_1.add_fusion(fusion_1) experiment_1.annotate_genes(genes) experiment_2 = FusionDetectionExperiment("Experiment_2") experiment_2.add_fusion(fusion_2) experiment_2.annotate_genes(genes) args = CLI( ['-f', 'summary', '--no-strand-specific-matching', '-s', '']) overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiment_1) overlapping_complex.add_experiment(experiment_2) overlap = overlapping_complex.overlay_fusions(True, False, args) self.assertEqual(overlap[0][0].left_strand, None) self.assertEqual(overlap[0][0].right_strand, None) args = CLI( ['-f', 'summary', '--no-strand-specific-matching', '-s', '']) overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiment_1) overlapping_complex.add_experiment(experiment_1) overlap = overlapping_complex.overlay_fusions(True, False, args) self.assertNotEqual(overlap[0][0].left_strand, None) self.assertNotEqual(overlap[0][0].right_strand, None) args = CLI( ['-f', 'summary', '--no-strand-specific-matching', '-s', '']) overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiment_2) overlapping_complex.add_experiment(experiment_2) overlap = overlapping_complex.overlay_fusions(True, False, args) self.assertNotEqual(overlap[0][0].left_strand, None) self.assertNotEqual(overlap[0][0].right_strand, None)
def test_03(self): """ #AB1: --> --> break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] #AB2: <-- <-- break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] #AB3: --> <-- break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] #AB4: <-- --> break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] #BA1: --> --> break1 break2 | | [ --- Gene B --- ] [ --- Gene A --- ] #BA2: <-- <-- break1 break2 | | [ --- Gene B --- ] [ --- Gene A --- ] #BA3: --> <-- break1 break2 | | [ --- Gene B --- ] [ --- Gene A --- ] #BA4: <-- --> break1 break2 | | [ --- Gene B --- ] [ --- Gene A --- ] """ gene_A = Gene("A", False) gene_B = Gene("B", False) genes = GeneAnnotation("hg19") genes.add_annotation(gene_A, "1", 10000, 20000) genes.add_annotation(gene_B, "1", 80000, 90000) fusion_AB1 = Fusion("chr1", "chr1", 15000, 85000, "+", "+", "Experiment_AB1", "1", True) fusion_AB2 = Fusion("chr1", "chr1", 15000, 85000, "-", "-", "Experiment_AB2", "2", True) fusion_AB3 = Fusion("chr1", "chr1", 15000, 85000, "+", "-", "Experiment_AB3", "3", True) fusion_AB4 = Fusion("chr1", "chr1", 15000, 85000, "-", "+", "Experiment_AB4", "4", True) fusion_BA1 = Fusion("chr1", "chr1", 85000, 15000, "+", "+", "Experiment_BA1", "5", True) fusion_BA2 = Fusion("chr1", "chr1", 85000, 15000, "-", "-", "Experiment_BA2", "6", True) fusion_BA3 = Fusion("chr1", "chr1", 85000, 15000, "+", "-", "Experiment_BA3", "7", True) fusion_BA4 = Fusion("chr1", "chr1", 85000, 15000, "-", "+", "Experiment_BA4", "8", True) experiments = {'AB': [], 'BA': []} experiments['AB'].append(FusionDetectionExperiment("Experiment_AB1")) experiments['AB'][0].add_fusion(fusion_AB1) experiments['AB'][0].annotate_genes(genes) experiments['AB'].append(FusionDetectionExperiment("Experiment_AB2")) experiments['AB'][1].add_fusion(fusion_AB2) experiments['AB'][1].annotate_genes(genes) experiments['AB'].append(FusionDetectionExperiment("Experiment_AB3")) experiments['AB'][2].add_fusion(fusion_AB3) experiments['AB'][2].annotate_genes(genes) experiments['AB'].append(FusionDetectionExperiment("Experiment_AB4")) experiments['AB'][3].add_fusion(fusion_AB4) experiments['AB'][3].annotate_genes(genes) experiments['BA'].append(FusionDetectionExperiment("Experiment_BA1")) experiments['BA'][0].add_fusion(fusion_BA1) experiments['BA'][0].annotate_genes(genes) experiments['BA'].append(FusionDetectionExperiment("Experiment_BA2")) experiments['BA'][1].add_fusion(fusion_BA2) experiments['BA'][1].annotate_genes(genes) # Swap 3 and 4 - to match "-" , "+" and AB <-> BA experiments['BA'].append(FusionDetectionExperiment("Experiment_BA4")) experiments['BA'][2].add_fusion(fusion_BA4) experiments['BA'][2].annotate_genes(genes) experiments['BA'].append(FusionDetectionExperiment("Experiment_BA3")) experiments['BA'][3].add_fusion(fusion_BA3) experiments['BA'][3].annotate_genes(genes) # No strict settings - everything should match with everything args = CLI( ['-f', 'summary', '--no-strand-specific-matching', '-s', '']) for ad_direction_1 in experiments.keys(): for breakpoint_strand_1 in range(len(experiments[ad_direction_1])): for ad_direction_2 in experiments.keys(): for breakpoint_strand_2 in range( len(experiments[ad_direction_2])): overlapping_complex = OverlapComplex() overlapping_complex.add_experiment( experiments[ad_direction_1][breakpoint_strand_1]) overlapping_complex.add_experiment( experiments[ad_direction_2][breakpoint_strand_2]) overlap = overlapping_complex.overlay_fusions( True, False, args) self.assertEqual(len(overlap[0]), 1) if (ad_direction_1 == ad_direction_2): self.assertNotEqual( overlap[0][0].acceptor_donor_direction, None) else: self.assertEqual( overlap[0][0].acceptor_donor_direction, None) # No strict settings - everything should match with everything args = CLI(['-f', 'summary', '--strand-specific-matching', '-s', '']) for ad_direction_1 in experiments.keys(): for breakpoint_strand_1 in range(len(experiments[ad_direction_1])): for ad_direction_2 in experiments.keys(): for breakpoint_strand_2 in range( len(experiments[ad_direction_2])): overlapping_complex = OverlapComplex() overlapping_complex.add_experiment( experiments[ad_direction_1][breakpoint_strand_1]) overlapping_complex.add_experiment( experiments[ad_direction_2][breakpoint_strand_2]) overlap = overlapping_complex.overlay_fusions( True, False, args) if (breakpoint_strand_1 == breakpoint_strand_2): self.assertEqual(len(overlap[0]), 1) else: self.assertEqual(len(overlap[0]), 0) # No strict settings - everything should match with everything args = CLI([ '-f', 'summary', '--no-strand-specific-matching', '--acceptor-donor-order-specific-matching', '-s', '' ]) for ad_direction_1 in experiments.keys(): for breakpoint_strand_1 in range(len(experiments[ad_direction_1])): for ad_direction_2 in experiments.keys(): for breakpoint_strand_2 in range( len(experiments[ad_direction_2])): overlapping_complex = OverlapComplex() overlapping_complex.add_experiment( experiments[ad_direction_1][breakpoint_strand_1]) overlapping_complex.add_experiment( experiments[ad_direction_2][breakpoint_strand_2]) overlap = overlapping_complex.overlay_fusions( True, False, args) if (ad_direction_1 == ad_direction_2): self.assertEqual(len(overlap[0]), 1) else: self.assertEqual(len(overlap[0]), 0) # No strict settings - everything should match with everything args = CLI([ '-f', 'summary', '--strand-specific-matching', '--acceptor-donor-order-specific-matching', '-s', '' ]) for ad_direction_1 in experiments.keys(): for breakpoint_strand_1 in range(len(experiments[ad_direction_1])): for ad_direction_2 in experiments.keys(): for breakpoint_strand_2 in range( len(experiments[ad_direction_2])): overlapping_complex = OverlapComplex() overlapping_complex.add_experiment( experiments[ad_direction_1][breakpoint_strand_1]) overlapping_complex.add_experiment( experiments[ad_direction_2][breakpoint_strand_2]) overlap = overlapping_complex.overlay_fusions( True, False, args) if (breakpoint_strand_1 == breakpoint_strand_2) and ( ad_direction_1 == ad_direction_2): self.assertEqual(len(overlap[0]), 1) else: self.assertEqual(len(overlap[0]), 0)
def test_13(self): """ Crazy stuff: {a} = 1 , 2 {b} = 1 , 3 | f1 | f2 | f3 | ---+----+----+----+ f1 | * | 1 | 2 | ---+----+----+----+ f2 | 1 | * | | ---+----+----+----+ f3 | 2 | | * | ---+----+----+----+ 1 = [a,b,c] 2 = [a,c] 3 = [a,b] >> Try in all possible orders Break {1}: | Break {2}: : | : Break {3}: : : : : | a: [ : : : ] b: : [ : : ] c: [ : : ] : """ args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) fusion_1_exp_1 = Fusion("chr1","chr2",15000,70000,"+","+","Experiment_1","",True) fusion_2_exp_1 = Fusion("chr1","chr2",15000,80000,"+","+","Experiment_1","",True) fusion_3_exp_1 = Fusion("chr1","chr2",15000,60000,"+","+","Experiment_1","",True) fusion_1_exp_2 = Fusion("chr1","chr2",15000,70000,"+","+","Experiment_2","",True) fusion_2_exp_2 = Fusion("chr1","chr2",15000,80000,"+","+","Experiment_2","",True) fusion_3_exp_2 = Fusion("chr1","chr2",15000,60000,"+","+","Experiment_2","",True) fusion_1_exp_3 = Fusion("chr1","chr2",15000,70000,"+","+","Experiment_3","",True) fusion_2_exp_3 = Fusion("chr1","chr2",15000,80000,"+","+","Experiment_3","",True) fusion_3_exp_3 = Fusion("chr1","chr2",15000,60000,"+","+","Experiment_3","",True) fusion_1_exp_4 = Fusion("chr1","chr2",15000,70000,"+","+","Experiment_4","",True) fusion_2_exp_4 = Fusion("chr1","chr2",15000,80000,"+","+","Experiment_4","",True) fusion_3_exp_4 = Fusion("chr1","chr2",15000,60000,"+","+","Experiment_4","",True) fusion_1_exp_5 = Fusion("chr1","chr2",15000,70000,"+","+","Experiment_5","",True) fusion_2_exp_5 = Fusion("chr1","chr2",15000,80000,"+","+","Experiment_5","",True) fusion_3_exp_5 = Fusion("chr1","chr2",15000,60000,"+","+","Experiment_5","",True) fusion_1_exp_6 = Fusion("chr1","chr2",15000,70000,"+","+","Experiment_6","",True) fusion_2_exp_6 = Fusion("chr1","chr2",15000,80000,"+","+","Experiment_6","",True) fusion_3_exp_6 = Fusion("chr1","chr2",15000,60000,"+","+","Experiment_6","",True) experiment_1 = FusionDetectionExperiment("Experiment_1") experiment_2 = FusionDetectionExperiment("Experiment_2") experiment_3 = FusionDetectionExperiment("Experiment_3") experiment_4 = FusionDetectionExperiment("Experiment_4") experiment_5 = FusionDetectionExperiment("Experiment_5") experiment_6 = FusionDetectionExperiment("Experiment_6") experiment_1.add_fusion(fusion_1_exp_1) experiment_1.add_fusion(fusion_2_exp_1) experiment_1.add_fusion(fusion_3_exp_1) experiment_2.add_fusion(fusion_1_exp_2) experiment_2.add_fusion(fusion_3_exp_2) experiment_2.add_fusion(fusion_2_exp_2) experiment_3.add_fusion(fusion_2_exp_3) experiment_3.add_fusion(fusion_1_exp_3) experiment_3.add_fusion(fusion_3_exp_3) experiment_4.add_fusion(fusion_2_exp_4) experiment_4.add_fusion(fusion_3_exp_4) experiment_4.add_fusion(fusion_1_exp_4) experiment_5.add_fusion(fusion_3_exp_5) experiment_5.add_fusion(fusion_1_exp_5) experiment_5.add_fusion(fusion_2_exp_5) experiment_6.add_fusion(fusion_3_exp_6) experiment_6.add_fusion(fusion_2_exp_6) experiment_6.add_fusion(fusion_1_exp_6) self.assertEqual(len(experiment_1), 3) self.assertEqual(len(experiment_2), 3) self.assertEqual(len(experiment_3), 3) self.assertEqual(len(experiment_4), 3) self.assertEqual(len(experiment_5), 3) self.assertEqual(len(experiment_6), 3) genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_13.bed","hg18", 200000) experiment_1.annotate_genes(genes) experiment_2.annotate_genes(genes) experiment_3.annotate_genes(genes) experiment_4.annotate_genes(genes) experiment_5.annotate_genes(genes) experiment_6.annotate_genes(genes) experiment_1.remove_duplicates(args) experiment_2.remove_duplicates(args) experiment_3.remove_duplicates(args) experiment_4.remove_duplicates(args) experiment_5.remove_duplicates(args) experiment_6.remove_duplicates(args) # Removing duplicates: # # order 1: # -> [A,B] <-> [A,C] = [A,B] & [A,C] & [A,B,C] # -> [A,B] <-> [A,B,C] = [A,B*] & [A,C] # -> [A,B*] <-> [A,C] = [A,B*] & [A,C] # order 2: # -> [A,B] <-> [A,B,C] = [A,B*] & [A,C] # -> [A,B*] <-> [A,C] = [A,B*] & [A,C] # order 3: # -> [A,C] <-> [A,B] = [A,C] & [A,B] & [A,B,C] # -> [A,C] <-> [A,B,C] = [A,C*] & [A,B] # -> [A,C*] <-> [A,B] = [A,C*] & [A,B] # order 4: # -> [A,C] <-> [A,B,C] = [A,C*] & [A,B] # -> [A,C*] <-> [A,B] = [A,C*] & [A,B] # order 5: # -> [A,B,C] <-> [A,B] = [A,B*] & [A,C] # -> [A,B*] <-> [A,C] = [A,B*] & [A,C] # order 6: # -> [A,B,C] <-> [A,C] = [A,C*] & [A,B] # -> [A,C*] <-> [A,B] = [A,C*] & [A,B] self.assertEqual(len(experiment_1), 2) self.assertEqual(len(experiment_2), 2) self.assertEqual(len(experiment_3), 2) self.assertEqual(len(experiment_4), 2) self.assertEqual(len(experiment_5), 2) self.assertEqual(len(experiment_6), 2)
def test_04(self): """ #1: --> <-- break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] #2: <-- --> break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] Ensure the strand of the merged fusion is not set! """ gene_A = Gene("A", False) gene_B = Gene("B", False) genes = GeneAnnotation("hg19") genes.add_annotation(gene_A,"1",10000,20000) genes.add_annotation(gene_B,"1",80000,90000) fusion_1 = Fusion("chr1","chr1",15000,85000,None,None,"+","-","Experiment_1") fusion_1.add_location({'left':[fusion_1.get_left_chromosome(), fusion_1.get_left_break_position()], 'right':[fusion_1.get_right_chromosome(), fusion_1.get_right_break_position()], 'id':3, 'dataset':fusion_1.dataset_name }) fusion_2 = Fusion("chr1","chr1",15000,85000,None,None,"-","+","Experiment_2") fusion_2.add_location({'left':[fusion_2.get_left_chromosome(), fusion_2.get_left_break_position()], 'right':[fusion_2.get_right_chromosome(), fusion_2.get_right_break_position()], 'id':4, 'dataset':fusion_2.dataset_name }) experiment_1 = FusionDetectionExperiment("Experiment_1") experiment_1.add_fusion(fusion_1) experiment_1.annotate_genes(genes) experiment_2 = FusionDetectionExperiment("Experiment_2") experiment_2.add_fusion(fusion_2) experiment_2.annotate_genes(genes) args = CLI(['-f','summary','--no-strand-specific-matching','-s','']) overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiment_1) overlapping_complex.add_experiment(experiment_2) overlap = overlapping_complex.overlay_fusions(True,False,args) self.assertEqual(overlap[0][0].left_strand, None) self.assertEqual(overlap[0][0].right_strand, None) args = CLI(['-f','summary','--no-strand-specific-matching','-s','']) overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiment_1) overlapping_complex.add_experiment(experiment_1) overlap = overlapping_complex.overlay_fusions(True,False,args) self.assertNotEqual(overlap[0][0].left_strand, None) self.assertNotEqual(overlap[0][0].right_strand, None) args = CLI(['-f','summary','--no-strand-specific-matching','-s','']) overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiment_2) overlapping_complex.add_experiment(experiment_2) overlap = overlapping_complex.overlay_fusions(True,False,args) self.assertNotEqual(overlap[0][0].left_strand, None) self.assertNotEqual(overlap[0][0].right_strand, None)
def test_03(self): """ #AB1: --> --> break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] #AB2: <-- <-- break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] #AB3: --> <-- break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] #AB4: <-- --> break1 break2 | | [ --- Gene A --- ] [ --- Gene B --- ] #BA1: --> --> break1 break2 | | [ --- Gene B --- ] [ --- Gene A --- ] #BA2: <-- <-- break1 break2 | | [ --- Gene B --- ] [ --- Gene A --- ] #BA3: --> <-- break1 break2 | | [ --- Gene B --- ] [ --- Gene A --- ] #BA4: <-- --> break1 break2 | | [ --- Gene B --- ] [ --- Gene A --- ] """ gene_A = Gene("A", False) gene_B = Gene("B", False) genes = GeneAnnotation("hg19") genes.add_annotation(gene_A,"1",10000,20000) genes.add_annotation(gene_B,"1",80000,90000) fusion_AB1 = Fusion("chr1","chr1",15000,85000,None,None,"+","+","Experiment_AB1") fusion_AB1.add_location({'left':[fusion_AB1.get_left_chromosome(), fusion_AB1.get_left_break_position()], 'right':[fusion_AB1.get_right_chromosome(), fusion_AB1.get_right_break_position()], 'id':1, 'dataset':fusion_AB1.dataset_name }) fusion_AB2 = Fusion("chr1","chr1",15000,85000,None,None,"-","-","Experiment_AB2") fusion_AB2.add_location({'left':[fusion_AB2.get_left_chromosome(), fusion_AB2.get_left_break_position()], 'right':[fusion_AB2.get_right_chromosome(), fusion_AB2.get_right_break_position()], 'id':2, 'dataset':fusion_AB2.dataset_name }) fusion_AB3 = Fusion("chr1","chr1",15000,85000,None,None,"+","-","Experiment_AB3") fusion_AB3.add_location({'left':[fusion_AB3.get_left_chromosome(), fusion_AB3.get_left_break_position()], 'right':[fusion_AB3.get_right_chromosome(), fusion_AB3.get_right_break_position()], 'id':3, 'dataset':fusion_AB3.dataset_name }) fusion_AB4 = Fusion("chr1","chr1",15000,85000,None,None,"-","+","Experiment_AB4") fusion_AB4.add_location({'left':[fusion_AB4.get_left_chromosome(), fusion_AB4.get_left_break_position()], 'right':[fusion_AB4.get_right_chromosome(), fusion_AB4.get_right_break_position()], 'id':4, 'dataset':fusion_AB4.dataset_name }) fusion_BA1 = Fusion("chr1","chr1",85000,15000,None,None,"+","+","Experiment_BA1") fusion_BA1.add_location({'left':[fusion_BA1.get_left_chromosome(), fusion_BA1.get_left_break_position()], 'right':[fusion_BA1.get_right_chromosome(), fusion_BA1.get_right_break_position()], 'id':5, 'dataset':fusion_BA1.dataset_name }) fusion_BA2 = Fusion("chr1","chr1",85000,15000,None,None,"-","-","Experiment_BA2") fusion_BA2.add_location({'left':[fusion_BA2.get_left_chromosome(), fusion_BA2.get_left_break_position()], 'right':[fusion_BA2.get_right_chromosome(), fusion_BA2.get_right_break_position()], 'id':6, 'dataset':fusion_BA2.dataset_name }) fusion_BA3 = Fusion("chr1","chr1",85000,15000,None,None,"+","-","Experiment_BA3") fusion_BA3.add_location({'left':[fusion_BA3.get_left_chromosome(), fusion_BA3.get_left_break_position()], 'right':[fusion_BA3.get_right_chromosome(), fusion_BA3.get_right_break_position()], 'id':7, 'dataset':fusion_BA3.dataset_name }) fusion_BA4 = Fusion("chr1","chr1",85000,15000,None,None,"-","+","Experiment_BA4") fusion_BA4.add_location({'left':[fusion_BA4.get_left_chromosome(), fusion_BA4.get_left_break_position()], 'right':[fusion_BA4.get_right_chromosome(), fusion_BA4.get_right_break_position()], 'id':8, 'dataset':fusion_BA4.dataset_name }) experiments = {'AB':[],'BA':[]} experiments['AB'].append(FusionDetectionExperiment("Experiment_AB1")) experiments['AB'][0].add_fusion(fusion_AB1) experiments['AB'][0].annotate_genes(genes) experiments['AB'].append(FusionDetectionExperiment("Experiment_AB2")) experiments['AB'][1].add_fusion(fusion_AB2) experiments['AB'][1].annotate_genes(genes) experiments['AB'].append(FusionDetectionExperiment("Experiment_AB3")) experiments['AB'][2].add_fusion(fusion_AB3) experiments['AB'][2].annotate_genes(genes) experiments['AB'].append(FusionDetectionExperiment("Experiment_AB4")) experiments['AB'][3].add_fusion(fusion_AB4) experiments['AB'][3].annotate_genes(genes) experiments['BA'].append(FusionDetectionExperiment("Experiment_BA1")) experiments['BA'][0].add_fusion(fusion_BA1) experiments['BA'][0].annotate_genes(genes) experiments['BA'].append(FusionDetectionExperiment("Experiment_BA2")) experiments['BA'][1].add_fusion(fusion_BA2) experiments['BA'][1].annotate_genes(genes) # Swap 3 and 4 - to match "-" , "+" and AB <-> BA experiments['BA'].append(FusionDetectionExperiment("Experiment_BA4")) experiments['BA'][2].add_fusion(fusion_BA4) experiments['BA'][2].annotate_genes(genes) experiments['BA'].append(FusionDetectionExperiment("Experiment_BA3")) experiments['BA'][3].add_fusion(fusion_BA3) experiments['BA'][3].annotate_genes(genes) # No strict settings - everything should match with everything args = CLI(['-f','summary','--no-strand-specific-matching','-s','']) for ad_direction_1 in experiments.keys(): for breakpoint_strand_1 in range(len(experiments[ad_direction_1])): for ad_direction_2 in experiments.keys(): for breakpoint_strand_2 in range(len(experiments[ad_direction_2])): overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiments[ad_direction_1][breakpoint_strand_1]) overlapping_complex.add_experiment(experiments[ad_direction_2][breakpoint_strand_2]) overlap = overlapping_complex.overlay_fusions(True,False,args) self.assertEqual(len(overlap[0]), 1) if(ad_direction_1 == ad_direction_2): self.assertNotEqual(overlap[0][0].acceptor_donor_direction, None) else: self.assertEqual(overlap[0][0].acceptor_donor_direction, None) # No strict settings - everything should match with everything args = CLI(['-f','summary','--strand-specific-matching','-s','']) for ad_direction_1 in experiments.keys(): for breakpoint_strand_1 in range(len(experiments[ad_direction_1])): for ad_direction_2 in experiments.keys(): for breakpoint_strand_2 in range(len(experiments[ad_direction_2])): overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiments[ad_direction_1][breakpoint_strand_1]) overlapping_complex.add_experiment(experiments[ad_direction_2][breakpoint_strand_2]) overlap = overlapping_complex.overlay_fusions(True,False,args) if(breakpoint_strand_1 == breakpoint_strand_2): self.assertEqual(len(overlap[0]), 1) else: self.assertEqual(len(overlap[0]), 0) # No strict settings - everything should match with everything args = CLI(['-f','summary','--no-strand-specific-matching','--acceptor-donor-order-specific-matching','-s','']) for ad_direction_1 in experiments.keys(): for breakpoint_strand_1 in range(len(experiments[ad_direction_1])): for ad_direction_2 in experiments.keys(): for breakpoint_strand_2 in range(len(experiments[ad_direction_2])): overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiments[ad_direction_1][breakpoint_strand_1]) overlapping_complex.add_experiment(experiments[ad_direction_2][breakpoint_strand_2]) overlap = overlapping_complex.overlay_fusions(True,False,args) if(ad_direction_1 == ad_direction_2): self.assertEqual(len(overlap[0]), 1) else: self.assertEqual(len(overlap[0]), 0) # No strict settings - everything should match with everything args = CLI(['-f','summary','--strand-specific-matching','--acceptor-donor-order-specific-matching','-s','']) for ad_direction_1 in experiments.keys(): for breakpoint_strand_1 in range(len(experiments[ad_direction_1])): for ad_direction_2 in experiments.keys(): for breakpoint_strand_2 in range(len(experiments[ad_direction_2])): overlapping_complex = OverlapComplex() overlapping_complex.add_experiment(experiments[ad_direction_1][breakpoint_strand_1]) overlapping_complex.add_experiment(experiments[ad_direction_2][breakpoint_strand_2]) overlap = overlapping_complex.overlay_fusions(True,False,args) if (breakpoint_strand_1 == breakpoint_strand_2) and (ad_direction_1 == ad_direction_2): self.assertEqual(len(overlap[0]), 1) else: self.assertEqual(len(overlap[0]), 0)