def test_01(self): args = CLI(['-m','egm','--no-strand-specific-matching','-s','','-o','test_ComparisonTriangle.test_02.output.txt']) experiment_a = ReadChimeraScanAbsoluteBEDPE("tests/data/test_Functional.test_01.Example_01.bedpe","test1") experiment_b = ReadChimeraScanAbsoluteBEDPE("tests/data/test_Functional.test_01.Example_02.bedpe","test2") self.assertEqual(len(experiment_a), 2) self.assertEqual(len(experiment_b), 2) genes = ParseBED("tests/data/refseq_hg19.bed","hg19",200000) #F[a] + F[b] = MF(A, B) mf_a = MergedFusion() for fusion in experiment_a: mf_a.add_fusion(fusion) self.assertEqual(len(mf_a), 2) #F[c] + F[d] = MF(C, D) mf_b = MergedFusion() for fusion in experiment_b: mf_b.add_fusion(fusion) self.assertEqual(len(mf_b), 2) self.assertEqual(str(mf_b), "--- MergedFusion container of size 2 ---\nFusion 't2_431' (from dataset 'test2'): chr22:15465000(-)<-chr22:41929200(+)\n\nFusion 't2_223' (from dataset 'test2'): chr11:524500(-)<-chr11:62910000(+)\n----------------------------------------\n")
def test_01(self): args = CLI([ '-m', 'egm', '--no-strand-specific-matching', '-s', '', '-o', 'test_ComparisonTriangle.test_02.output.txt' ]) experiment_a = ReadChimeraScanAbsoluteBEDPE( "tests/data/test_Functional.test_01.Example_01.bedpe", "test1") experiment_b = ReadChimeraScanAbsoluteBEDPE( "tests/data/test_Functional.test_01.Example_02.bedpe", "test2") self.assertEqual(len(experiment_a), 2) self.assertEqual(len(experiment_b), 2) genes = ParseBED("tests/data/refseq_hg19.bed", "hg19", 200000) #F[a] + F[b] = MF(A, B) mf_a = MergedFusion() for fusion in experiment_a: mf_a.add_fusion(fusion) self.assertEqual(len(mf_a), 2) #F[c] + F[d] = MF(C, D) mf_b = MergedFusion() for fusion in experiment_b: mf_b.add_fusion(fusion) self.assertEqual(len(mf_b), 2)
def test_03(self): """ Check the duplication removal - simple test; 2 identical fusions """ args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) fusion_1 = Fusion("chr1","chr2",15000,20000,"+","+","Experiment","",True) fusion_2 = Fusion("chr1","chr2",15000,20000,"+","+","Experiment","",True) experiment = FusionDetectionExperiment("Experiment_1") experiment.add_fusion(fusion_1) experiment.add_fusion(fusion_2) self.assertEqual(len(experiment), 2) genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_03.bed","hg18", 200000) experiment.annotate_genes(genes) experiment.remove_duplicates(args) self.assertEqual(len(experiment), 1) for fusion in experiment: self.assertEqual(len(fusion.annotated_genes_left), 1) self.assertEqual(len(fusion.annotated_genes_right), 1)
def test_01(self): args = CLI(['-m', 'subset', '--no-strand-specific-matching', '-s', '']) experiment_a = ReadChimeraScanAbsoluteBEDPE( "tests/data/test_CompareFusionsBySpanningGenes.TestCompareFusionsBySpanningGenes.test_01.bedpe", "TestExperimentA") experiment_b = ReadChimeraScanAbsoluteBEDPE( "tests/data/test_CompareFusionsBySpanningGenes.TestCompareFusionsBySpanningGenes.test_01.bedpe", "TestExperimentB") self.assertEqual(len(experiment_a), 690) self.assertEqual(len(experiment_b), 690) genes = ParseBED( "tests/data/test_CompareFusionsBySpanningGenes.TestCompareFusionsBySpanningGenes.test_01.bed", "hg18", 200000) self.assertEqual(len(genes), 47790) experiment_a.annotate_genes(genes) experiment_b.annotate_genes(genes) experiment_a.remove_duplicates(args) experiment_b.remove_duplicates(args) overlap = CompareFusionsBySpanningGenes(experiment_a, experiment_b, args) overlapping_fusions = overlap.find_overlap() self.assertLessEqual(len(overlapping_fusions[0]), 538) self.assertLessEqual(538, len(experiment_a))
def test_08(self): """ Check the duplication removal - 2 fusions where one is missing annotations -> one should be lost because it isn't gene spanning """ args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) fusion_1 = Fusion("chr1","chr2",15000,20000,"+","+","Experiment","",True) fusion_2 = Fusion("chr1","chr2",15000,30000,"+","+","Experiment","",True) experiment = FusionDetectionExperiment("Experiment_1") experiment.add_fusion(fusion_1) experiment.add_fusion(fusion_2) self.assertEqual(len(experiment), 2) genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_08.bed","hg18", 200000) experiment.annotate_genes(genes) experiment.remove_duplicates(args) self.assertEqual(len(experiment), 1) for fusion in experiment: self.assertEqual(len(fusion.annotated_genes_left), 1) self.assertEqual(len(fusion.annotated_genes_right), 1)
def test_05(self): """ Check the duplication removal - 2 fusions; one has two overlapping left genes, the other only one, and for right is vice versa. The 1 gene is always a subset of the others 2 genes and must therefore be treated as identical annotations """ args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) fusion_1 = Fusion("chr1","chr2",15000,20050,"+","+","Experiment","",True)#(1A):(2A,2B) fusion_2 = Fusion("chr1","chr2",15050,20000,"+","+","Experiment","",True)#(1A,1B):(2A) experiment = FusionDetectionExperiment("Experiment_1") experiment.add_fusion(fusion_1) experiment.add_fusion(fusion_2) self.assertEqual(len(experiment), 2) genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_05.bed","hg18", 200000) experiment.annotate_genes(genes) experiment.remove_duplicates(args) self.assertEqual(len(experiment), 1) for fusion in experiment: self.assertEqual(len(fusion.annotated_genes_left), 1) # subset = (1A) self.assertEqual(len(fusion.annotated_genes_right), 1) # subset = (2A)
def test_12(self): """ {a} = 1 , 4 , 5 {b} = 2 , 6 {c} = 7 , 8 {d} = 3 | f1 | f2 | f3 | f4 | f5 | f6 | f7 | f8 | ---+----+----+----+----+----+----+----+----+ f1 | * | | | a | a | | | | ---+----+----+----+----+----+----+----+----+ f2 | | * | | | | b | | | ---+----+----+----+----+----+----+----+----+ f3 | | | d | | | | | | ---+----+----+----+----+----+----+----+----+ f4 | a | | | * | a | | | | ---+----+----+----+----+----+----+----+----+ f5 | a | | | a | * | | | | ---+----+----+----+----+----+----+----+----+ f6 | | b | | | | * | | | ---+----+----+----+----+----+----+----+----+ f7 | | | | | | | * | d | ---+----+----+----+----+----+----+----+----+ f8 | | | | | | | c | * | ---+----+----+----+----+----+----+----+----+ """ args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) fusion_1 = Fusion("chr1","chr1",15010,80040,"+","+","Experiment","",True) fusion_2 = Fusion("chr2","chr2",15030,80030,"+","+","Experiment","",True) fusion_3 = Fusion("chr4","chr4",15050,80070,"+","+","Experiment","",True) fusion_4 = Fusion("chr1","chr1",15060,80010,"+","+","Experiment","",True) fusion_5 = Fusion("chr1","chr1",15020,80050,"+","+","Experiment","",True) fusion_6 = Fusion("chr2","chr2",15080,80080,"+","+","Experiment","",True) fusion_7 = Fusion("chr3","chr3",15040,80020,"+","+","Experiment","",True) fusion_8 = Fusion("chr3","chr3",15070,80060,"+","+","Experiment","",True) experiment = FusionDetectionExperiment("Experiment_1") experiment.add_fusion(fusion_2) experiment.add_fusion(fusion_8) experiment.add_fusion(fusion_6) experiment.add_fusion(fusion_4) experiment.add_fusion(fusion_5) experiment.add_fusion(fusion_3) experiment.add_fusion(fusion_1) experiment.add_fusion(fusion_7) self.assertEqual(len(experiment), 8) genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_12.bed","hg18", 200000) experiment.annotate_genes(genes) experiment.remove_duplicates(args) self.assertEqual(len(experiment), 4)
def test_01(self): args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) experiment = ReadChimeraScanAbsoluteBEDPE("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_01.bedpe","TestExperiment") genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_01.bed","hg18", 200000) length_before_duplication_removal = len(experiment) experiment.annotate_genes(genes) experiment.remove_duplicates(args) length_after_duplication_removal = len(experiment) self.assertTrue(length_before_duplication_removal > length_after_duplication_removal)
def test_11(self): """ May not have overlap: {1} = a,b,c {2} = a,b,d Break {1}: | Break {2}: : | : a: [ : : ] b: [ | : ] c: : [ : ] d: [ : ] """ args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) fusion_1 = Fusion("chr1","chr2",15000,27500,"+","+","Experiment","",True) fusion_2 = Fusion("chr1","chr2",15000,25500,"+","+","Experiment","",True) experiment = FusionDetectionExperiment("Experiment_1") experiment.add_fusion(fusion_1) experiment.add_fusion(fusion_2) self.assertEqual(len(experiment), 2) genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_11.bed","hg18", 200000) experiment.annotate_genes(genes) for fusion in experiment: if(fusion.get_right_break_position() == 30000): self.assertEqual(len(fusion.annotated_genes_right), 3) self.assertTrue("NM_00002A" in [str(gene_name) for gene_name in fusion.annotated_genes_right]) self.assertTrue("NM_00002B" in [str(gene_name) for gene_name in fusion.annotated_genes_right]) self.assertTrue("NM_00002C" in [str(gene_name) for gene_name in fusion.annotated_genes_right]) if(fusion.get_right_break_position() == 25000): self.assertEqual(len(fusion.annotated_genes_right), 3) self.assertTrue("NM_00002A" in [str(gene_name) for gene_name in fusion.annotated_genes_right]) self.assertTrue("NM_00002B" in [str(gene_name) for gene_name in fusion.annotated_genes_right]) self.assertTrue("NM_00002D" in [str(gene_name) for gene_name in fusion.annotated_genes_right]) experiment.remove_duplicates(args) self.assertEqual(len(experiment), 2)
def test_02(self): experiment = ReadChimeraScanAbsoluteBEDPE("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_02.bedpe","TestExperiment") genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_02.bed","hg18", 200000) self.assertEqual(len(experiment), 1) experiment.annotate_genes(genes) for fusion in experiment: left_genes = fusion.get_annotated_genes_left(False) right_genes = fusion.get_annotated_genes_right(False) self.assertEqual(len(left_genes), 8) self.assertEqual(len(right_genes), 8) # Ensure all annotated gene names do NOT contain substring '_invalid' self.assertEqual(min([str(gene_name).find("_invalid") for gene_name in left_genes]) , -1) self.assertEqual(min([str(gene_name).find("_invalid") for gene_name in right_genes]) , -1)
def test_02(self): args_a = CLI(['-m', 'subset', '-s', '']) args_b = CLI(['-m', 'subset', '--strand-specific-matching', '-s', '']) ## First test the matches if strand-specific-matching is disabled (all 4 fusions should be identical) experiment_a = ReadChimeraScanAbsoluteBEDPE( "tests/data/test_CompareFusionsBySpanningGenes.TestCompareFusionsBySpanningGenes.test_02_a.bedpe", "TestExperimentA") experiment_b = ReadChimeraScanAbsoluteBEDPE( "tests/data/test_CompareFusionsBySpanningGenes.TestCompareFusionsBySpanningGenes.test_02_b.bedpe", "TestExperimentB") self.assertEqual(len(experiment_a), 4) self.assertEqual(len(experiment_b), 4) genes = ParseBED( "tests/data/test_CompareFusionsBySpanningGenes.TestCompareFusionsBySpanningGenes.test_02.bed", "hg18", 200000) self.assertEqual(len(genes), 8) experiment_a.annotate_genes(genes) experiment_b.annotate_genes(genes) ## @todo -> remove duplicates should be done separately experiment_a.remove_duplicates(args_a) experiment_b.remove_duplicates(args_a) overlap = CompareFusionsBySpanningGenes( experiment_a, experiment_b, args_a) # No EGM, no strand-specific-matching overlapping_fusions = overlap.find_overlap() self.assertLessEqual(len(overlapping_fusions[0]), 4) ## Second, test the matches if strand-specific-matching is disabled (only the first fusion should be identical) overlap = CompareFusionsBySpanningGenes( experiment_a, experiment_b, args_b) # No EGM, but strand-specific-matching overlapping_fusions = overlap.find_overlap() self.assertLessEqual(len(overlapping_fusions[0]), 1)
def test_01(self): filename = "tests/data/test_ParseBED.TestParseBED.test_01.bed" gene_annotation = ParseBED(filename, "test", 200000) self.assertEqual(gene_annotation.n, 88)
def test_13(self): """ Crazy stuff: {a} = 1 , 2 {b} = 1 , 3 | f1 | f2 | f3 | ---+----+----+----+ f1 | * | 1 | 2 | ---+----+----+----+ f2 | 1 | * | | ---+----+----+----+ f3 | 2 | | * | ---+----+----+----+ 1 = [a,b,c] 2 = [a,c] 3 = [a,b] >> Try in all possible orders Break {1}: | Break {2}: : | : Break {3}: : : : : | a: [ : : : ] b: : [ : : ] c: [ : : ] : """ args = CLI(['-m','subset','--no-strand-specific-matching','-s','']) fusion_1_exp_1 = Fusion("chr1","chr2",15000,70000,"+","+","Experiment_1","",True) fusion_2_exp_1 = Fusion("chr1","chr2",15000,80000,"+","+","Experiment_1","",True) fusion_3_exp_1 = Fusion("chr1","chr2",15000,60000,"+","+","Experiment_1","",True) fusion_1_exp_2 = Fusion("chr1","chr2",15000,70000,"+","+","Experiment_2","",True) fusion_2_exp_2 = Fusion("chr1","chr2",15000,80000,"+","+","Experiment_2","",True) fusion_3_exp_2 = Fusion("chr1","chr2",15000,60000,"+","+","Experiment_2","",True) fusion_1_exp_3 = Fusion("chr1","chr2",15000,70000,"+","+","Experiment_3","",True) fusion_2_exp_3 = Fusion("chr1","chr2",15000,80000,"+","+","Experiment_3","",True) fusion_3_exp_3 = Fusion("chr1","chr2",15000,60000,"+","+","Experiment_3","",True) fusion_1_exp_4 = Fusion("chr1","chr2",15000,70000,"+","+","Experiment_4","",True) fusion_2_exp_4 = Fusion("chr1","chr2",15000,80000,"+","+","Experiment_4","",True) fusion_3_exp_4 = Fusion("chr1","chr2",15000,60000,"+","+","Experiment_4","",True) fusion_1_exp_5 = Fusion("chr1","chr2",15000,70000,"+","+","Experiment_5","",True) fusion_2_exp_5 = Fusion("chr1","chr2",15000,80000,"+","+","Experiment_5","",True) fusion_3_exp_5 = Fusion("chr1","chr2",15000,60000,"+","+","Experiment_5","",True) fusion_1_exp_6 = Fusion("chr1","chr2",15000,70000,"+","+","Experiment_6","",True) fusion_2_exp_6 = Fusion("chr1","chr2",15000,80000,"+","+","Experiment_6","",True) fusion_3_exp_6 = Fusion("chr1","chr2",15000,60000,"+","+","Experiment_6","",True) experiment_1 = FusionDetectionExperiment("Experiment_1") experiment_2 = FusionDetectionExperiment("Experiment_2") experiment_3 = FusionDetectionExperiment("Experiment_3") experiment_4 = FusionDetectionExperiment("Experiment_4") experiment_5 = FusionDetectionExperiment("Experiment_5") experiment_6 = FusionDetectionExperiment("Experiment_6") experiment_1.add_fusion(fusion_1_exp_1) experiment_1.add_fusion(fusion_2_exp_1) experiment_1.add_fusion(fusion_3_exp_1) experiment_2.add_fusion(fusion_1_exp_2) experiment_2.add_fusion(fusion_3_exp_2) experiment_2.add_fusion(fusion_2_exp_2) experiment_3.add_fusion(fusion_2_exp_3) experiment_3.add_fusion(fusion_1_exp_3) experiment_3.add_fusion(fusion_3_exp_3) experiment_4.add_fusion(fusion_2_exp_4) experiment_4.add_fusion(fusion_3_exp_4) experiment_4.add_fusion(fusion_1_exp_4) experiment_5.add_fusion(fusion_3_exp_5) experiment_5.add_fusion(fusion_1_exp_5) experiment_5.add_fusion(fusion_2_exp_5) experiment_6.add_fusion(fusion_3_exp_6) experiment_6.add_fusion(fusion_2_exp_6) experiment_6.add_fusion(fusion_1_exp_6) self.assertEqual(len(experiment_1), 3) self.assertEqual(len(experiment_2), 3) self.assertEqual(len(experiment_3), 3) self.assertEqual(len(experiment_4), 3) self.assertEqual(len(experiment_5), 3) self.assertEqual(len(experiment_6), 3) genes = ParseBED("tests/data/test_FusionDetectionExperiment.TestFusionDetectionExperiment.test_13.bed","hg18", 200000) experiment_1.annotate_genes(genes) experiment_2.annotate_genes(genes) experiment_3.annotate_genes(genes) experiment_4.annotate_genes(genes) experiment_5.annotate_genes(genes) experiment_6.annotate_genes(genes) experiment_1.remove_duplicates(args) experiment_2.remove_duplicates(args) experiment_3.remove_duplicates(args) experiment_4.remove_duplicates(args) experiment_5.remove_duplicates(args) experiment_6.remove_duplicates(args) # Removing duplicates: # # order 1: # -> [A,B] <-> [A,C] = [A,B] & [A,C] & [A,B,C] # -> [A,B] <-> [A,B,C] = [A,B*] & [A,C] # -> [A,B*] <-> [A,C] = [A,B*] & [A,C] # order 2: # -> [A,B] <-> [A,B,C] = [A,B*] & [A,C] # -> [A,B*] <-> [A,C] = [A,B*] & [A,C] # order 3: # -> [A,C] <-> [A,B] = [A,C] & [A,B] & [A,B,C] # -> [A,C] <-> [A,B,C] = [A,C*] & [A,B] # -> [A,C*] <-> [A,B] = [A,C*] & [A,B] # order 4: # -> [A,C] <-> [A,B,C] = [A,C*] & [A,B] # -> [A,C*] <-> [A,B] = [A,C*] & [A,B] # order 5: # -> [A,B,C] <-> [A,B] = [A,B*] & [A,C] # -> [A,B*] <-> [A,C] = [A,B*] & [A,C] # order 6: # -> [A,B,C] <-> [A,C] = [A,C*] & [A,B] # -> [A,C*] <-> [A,B] = [A,C*] & [A,B] self.assertEqual(len(experiment_1), 2) self.assertEqual(len(experiment_2), 2) self.assertEqual(len(experiment_3), 2) self.assertEqual(len(experiment_4), 2) self.assertEqual(len(experiment_5), 2) self.assertEqual(len(experiment_6), 2)