def test_check_cluster_predictions(self): seq_record = create_fake_record() promoters = [ Promoter("gene1", 1, 5), Promoter("gene2", 6, 10), CombinedPromoter("gene3", "gene4", 11, 15) ] ignored_genes = [ # see captured logging Gene(FeatureLocation(1, 5), locus_tag="gene5") ] clusters = [ ClusterPrediction(ClusterMarker("gene1", Motif(3, 3, score=1)), ClusterMarker("gene4", Motif(3, 3, score=1))) ] expected = [ ClusterPrediction(ClusterMarker("gene1", Motif(3, 3, score=1)), ClusterMarker("gene4", Motif(3, 3, score=1))) ] expected[0].start.promoter = "gene1" expected[0].end.promoter = "gene3+gene4" expected[0].genes = 4 expected[0].promoters = 3 assert check_cluster_predictions(clusters, seq_record, promoters, ignored_genes) == expected
def test_cleanup_outdir(self): anchor_genes = ["gene1", "gene4"] cluster = cassis.ClusterPrediction( ClusterMarker("gene1", Motif(3, 3, score=1)), ClusterMarker("gene4", Motif(3, 3, score=1))) cluster.start.promoter = "gene1" cluster.end.promoter = "gene3+gene4" cluster.genes = 4 cluster.promoters = 3 cluster_predictions = {"gene1": [cluster]} # create some empty test dirs, which should be deleted during the test # prediction! --> keep! os.makedirs( os.path.join(self.options.output_dir, "meme", "gene1", "+03_-03")) # prediction! --> keep! os.makedirs( os.path.join(self.options.output_dir, "fimo", "gene1", "+03_-03")) # no prediction --> delete os.makedirs( os.path.join(self.options.output_dir, "meme", "gene1", "+04_-04")) # no prediction --> delete os.makedirs( os.path.join(self.options.output_dir, "fimo", "gene1", "+04_-04")) # no prediction --> delete os.makedirs( os.path.join(self.options.output_dir, "meme", "gene4", "+03_-03")) # no prediction --> delete os.makedirs( os.path.join(self.options.output_dir, "fimo", "gene4", "+03_-03")) # prediction for this gene, but not from this motif --> delete os.makedirs( os.path.join(self.options.output_dir, "meme", "gene4", "+04_-04")) # prediction for this gene, but not from this motif --> delete os.makedirs( os.path.join(self.options.output_dir, "fimo", "gene4", "+04_-04")) cassis.cleanup_outdir(anchor_genes, cluster_predictions, self.options) # assert kept directories self.assertTrue("gene1" in os.listdir( os.path.join(self.options.output_dir, "meme"))) self.assertTrue("gene1" in os.listdir( os.path.join(self.options.output_dir, "fimo"))) self.assertTrue("+03_-03" in os.listdir( os.path.join(self.options.output_dir, "meme", "gene1"))) self.assertTrue("+03_-03" in os.listdir( os.path.join(self.options.output_dir, "fimo", "gene1"))) # assert deleted directories self.assertTrue("gene4" not in os.listdir( os.path.join(self.options.output_dir, "meme"))) self.assertTrue("gene4" not in os.listdir( os.path.join(self.options.output_dir, "fimo"))) self.assertTrue("+04_-04" not in os.listdir( os.path.join(self.options.output_dir, "meme", "gene1"))) self.assertTrue("+04_-04" not in os.listdir( os.path.join(self.options.output_dir, "fimo", "gene1")))
def test_regeneration(self): record = create_fake_record() results = cassis.CassisResults(record.id) # create a prediction, since it will generate a border with many extra qualifiers start_marker = ClusterMarker("gene1", Motif(3, 3, score=1)) start_marker.promoter = "gene1" start_marker.abundance = 2 end_marker = ClusterMarker("gene4", Motif(3, 3, score=1)) end_marker.promoter = "gene3+gene4" assert end_marker.abundance == 1 cluster = cassis.ClusterPrediction(start_marker, end_marker) results.subregions = cassis.create_subregions("gene1", [cluster], record) assert results.subregions results.promoters = [ Promoter("gene1", 10, 20, seq=Seq("cgtacgtacgt")), Promoter("gene2", 30, 40, seq=Seq("cgtacgtacgt")), CombinedPromoter("gene3", "gene4", 50, 60, seq=Seq("cgtacgtacgt")) ] round_trip = cassis.regenerate_previous_results( results.to_json(), record, None) assert isinstance(round_trip, cassis.CassisResults) assert len(results.subregions) == len(round_trip.subregions) for old, new in zip(results.subregions, round_trip.subregions): assert old.location == new.location assert old.to_biopython()[0].qualifiers == new.to_biopython( )[0].qualifiers assert round_trip.promoters == results.promoters
def test_store_subregions(self): # this test is similar to test_store_promoters anchor = "gene3" start_marker = ClusterMarker("gene1", Motif(3, 3, score=1)) start_marker.promoter = "gene1" start_marker.abundance = 2 end_marker = ClusterMarker("gene4", Motif(3, 3, score=1)) end_marker.promoter = "gene3+gene4" assert end_marker.abundance == 1 first_cluster = cassis.ClusterPrediction(start_marker, end_marker) first_cluster.promoters = 3 first_cluster.genes = 4 start_marker = ClusterMarker("gene1", Motif(4, 4, score=1)) start_marker.promoter = "gene1" assert start_marker.abundance == 1 end_marker = ClusterMarker("gene5", Motif(4, 4, score=1)) end_marker.promoter = "gene5" assert end_marker.abundance == 1 second_cluster = cassis.ClusterPrediction(start_marker, end_marker) second_cluster.promoters = 3 second_cluster.genes = 4 # order reversed because subregions are ordered by length when starts are the same region_predictions = [second_cluster, first_cluster] record_with_subregions = create_fake_record() record_without_subregions = create_fake_record( ) # just the same, without adding subregions subregions = cassis.create_subregions(anchor, region_predictions, record_with_subregions) assert record_with_subregions.get_feature_count( ) == record_without_subregions.get_feature_count() for region in subregions: record_with_subregions.add_subregion(region) # test subregion features expected_count = record_without_subregions.get_feature_count() + len( subregions) assert record_with_subregions.get_feature_count() == expected_count for i, region in enumerate(region_predictions): subregion = record_with_subregions.get_subregions()[i] self.assertEqual(subregion.type, "subregion") self.assertEqual(subregion.tool, "cassis") self.assertEqual(subregion.anchor, anchor) self.assertEqual(subregion.get_qualifier("genes"), (region.genes, )) self.assertEqual(subregion.get_qualifier("promoters"), (region.promoters, )) self.assertEqual(subregion.get_qualifier("gene_left"), (region.start.gene, )) self.assertEqual(subregion.get_qualifier("gene_right"), (region.end.gene, ))
def test_store_clusters(self): # this test is similar to test_store_promoters anchor = "gene3" start_marker = ClusterMarker("gene1", Motif(3, 3, score=1)) start_marker.promoter = "gene1" start_marker.abundance = 2 end_marker = ClusterMarker("gene4", Motif(3, 3, score=1)) end_marker.promoter = "gene3+gene4" assert end_marker.abundance == 1 first_cluster = cassis.ClusterPrediction(start_marker, end_marker) first_cluster.promoters = 3 first_cluster.genes = 4 start_marker = ClusterMarker("gene1", Motif(4, 4, score=1)) start_marker.promoter = "gene1" assert start_marker.abundance == 1 end_marker = ClusterMarker("gene5", Motif(4, 4, score=1)) end_marker.promoter = "gene5" assert end_marker.abundance == 1 second_cluster = cassis.ClusterPrediction(start_marker, end_marker) second_cluster.promoters = 3 second_cluster.genes = 4 clusters = [first_cluster, second_cluster] record_with_clusters = create_fake_record() record_without_clusters = create_fake_record( ) # just the same, without adding clusters borders = cassis.create_cluster_borders(anchor, clusters, record_with_clusters) assert record_with_clusters.get_feature_count( ) == record_without_clusters.get_feature_count() for border in borders: record_with_clusters.add_cluster_border(border) # test if store_clusters changed any non-cluster feature (should not!) # TODO # test cluster features assert record_without_clusters.get_feature_count() + len( clusters) == record_with_clusters.get_feature_count() for i, cluster in enumerate(clusters): cluster_border = record_with_clusters.get_cluster_borders()[i] self.assertEqual(cluster_border.type, "cluster_border") self.assertEqual(cluster_border.tool, "cassis") self.assertEqual(cluster_border.get_qualifier("anchor"), (anchor, )) self.assertEqual(cluster_border.get_qualifier("genes"), (cluster.genes, )) self.assertEqual(cluster_border.get_qualifier("promoters"), (cluster.promoters, )) self.assertEqual(cluster_border.get_qualifier("gene_left"), (cluster.start.gene, )) self.assertEqual(cluster_border.get_qualifier("gene_right"), (cluster.end.gene, ))
def test_sort_by_abundance(self): islands = [] # island 1: [gene1 -- gene2] motif = Motif(0, 3, score=3, hits={"gene1": 1, "gene2": 1}) islands.append(Island(Promoter("gene1", 1, 1), Promoter("gene2", 2, 2), motif)) # island 2: [gene2 -- gene5] motif = Motif(3, 0, score=2, hits={"gene2": 1, "gene3": 1, "gene4": 1, "gene5": 1}) islands.append(Island(Promoter("gene2", 2, 2), Promoter("gene5", 5, 5), motif)) # island 3: [gene1 -- gene5] motif = Motif(3, 3, score=1, hits={"gene1": 1, "gene2": 1, "gene3": 1, "gene4": 1, "gene5": 1}) islands.append(Island(Promoter("gene1", 1, 1), Promoter("gene5", 5, 5), motif)) # left border: 2x gene1, 1x gene2 # right border: 2x gene5, 1x gene2 expected_clusters = [] # cluster 1: [gene1 -- gene5] --> abundance 2+2 (most abundant) start = ClusterMarker("gene1", Motif(3, 3, score=1)) start.abundance = 2 end = ClusterMarker("gene5", Motif(3, 3, score=1)) end.abundance = 2 expected_clusters.append(ClusterPrediction(start, end)) # cluster 3: [gene2 -- gene5] --> abundance 1+2, score 2+1 (better/lower) start = ClusterMarker("gene2", Motif(3, 0, score=2)) start.abundance = 1 end = ClusterMarker("gene5", Motif(3, 3, score=1)) end.abundance = 2 expected_clusters.append(ClusterPrediction(start, end)) # cluster 2: [gene1 -- gene2] --> abundance 2+1, score 1+3 (worse, higher) start = ClusterMarker("gene1", Motif(3, 3, score=1)) start.abundance = 2 end = ClusterMarker("gene2", Motif(0, 3, score=3)) end.abundance = 1 expected_clusters.append(ClusterPrediction(start, end)) # cluster 4: [gene2 -- gene2] --> abundance 1+1 start = ClusterMarker("gene2", Motif(3, 0, score=2)) start.abundance = 1 end = ClusterMarker("gene2", Motif(0, 3, score=3)) end.abundance = 1 expected_clusters.append(ClusterPrediction(start, end)) # abundance: as high as possible # score: as low as possible self.assertEqual(create_predictions(islands), expected_clusters)