示例#1
0
    def test_check_cluster_predictions(self):
        seq_record = create_fake_record()
        promoters = [
            Promoter("gene1", 1, 5),
            Promoter("gene2", 6, 10),
            CombinedPromoter("gene3", "gene4", 11, 15)
        ]
        ignored_genes = [  # see captured logging
            Gene(FeatureLocation(1, 5), locus_tag="gene5")
        ]
        clusters = [
            ClusterPrediction(ClusterMarker("gene1", Motif(3, 3, score=1)),
                              ClusterMarker("gene4", Motif(3, 3, score=1)))
        ]
        expected = [
            ClusterPrediction(ClusterMarker("gene1", Motif(3, 3, score=1)),
                              ClusterMarker("gene4", Motif(3, 3, score=1)))
        ]
        expected[0].start.promoter = "gene1"
        expected[0].end.promoter = "gene3+gene4"
        expected[0].genes = 4
        expected[0].promoters = 3

        assert check_cluster_predictions(clusters, seq_record, promoters,
                                         ignored_genes) == expected
示例#2
0
    def test_cleanup_outdir(self):
        anchor_genes = ["gene1", "gene4"]
        cluster = cassis.ClusterPrediction(
            ClusterMarker("gene1", Motif(3, 3, score=1)),
            ClusterMarker("gene4", Motif(3, 3, score=1)))
        cluster.start.promoter = "gene1"
        cluster.end.promoter = "gene3+gene4"
        cluster.genes = 4
        cluster.promoters = 3
        cluster_predictions = {"gene1": [cluster]}

        # create some empty test dirs, which should be deleted during the test
        # prediction! --> keep!
        os.makedirs(
            os.path.join(self.options.output_dir, "meme", "gene1", "+03_-03"))
        # prediction! --> keep!
        os.makedirs(
            os.path.join(self.options.output_dir, "fimo", "gene1", "+03_-03"))
        # no prediction --> delete
        os.makedirs(
            os.path.join(self.options.output_dir, "meme", "gene1", "+04_-04"))
        # no prediction --> delete
        os.makedirs(
            os.path.join(self.options.output_dir, "fimo", "gene1", "+04_-04"))
        # no prediction --> delete
        os.makedirs(
            os.path.join(self.options.output_dir, "meme", "gene4", "+03_-03"))
        # no prediction --> delete
        os.makedirs(
            os.path.join(self.options.output_dir, "fimo", "gene4", "+03_-03"))
        # prediction for this gene, but not from this motif --> delete
        os.makedirs(
            os.path.join(self.options.output_dir, "meme", "gene4", "+04_-04"))
        # prediction for this gene, but not from this motif --> delete
        os.makedirs(
            os.path.join(self.options.output_dir, "fimo", "gene4", "+04_-04"))

        cassis.cleanup_outdir(anchor_genes, cluster_predictions, self.options)

        # assert kept directories
        self.assertTrue("gene1" in os.listdir(
            os.path.join(self.options.output_dir, "meme")))
        self.assertTrue("gene1" in os.listdir(
            os.path.join(self.options.output_dir, "fimo")))
        self.assertTrue("+03_-03" in os.listdir(
            os.path.join(self.options.output_dir, "meme", "gene1")))
        self.assertTrue("+03_-03" in os.listdir(
            os.path.join(self.options.output_dir, "fimo", "gene1")))

        # assert deleted directories
        self.assertTrue("gene4" not in os.listdir(
            os.path.join(self.options.output_dir, "meme")))
        self.assertTrue("gene4" not in os.listdir(
            os.path.join(self.options.output_dir, "fimo")))
        self.assertTrue("+04_-04" not in os.listdir(
            os.path.join(self.options.output_dir, "meme", "gene1")))
        self.assertTrue("+04_-04" not in os.listdir(
            os.path.join(self.options.output_dir, "fimo", "gene1")))
示例#3
0
    def test_regeneration(self):
        record = create_fake_record()
        results = cassis.CassisResults(record.id)
        # create a prediction, since it will generate a border with many extra qualifiers
        start_marker = ClusterMarker("gene1", Motif(3, 3, score=1))
        start_marker.promoter = "gene1"
        start_marker.abundance = 2
        end_marker = ClusterMarker("gene4", Motif(3, 3, score=1))
        end_marker.promoter = "gene3+gene4"
        assert end_marker.abundance == 1
        cluster = cassis.ClusterPrediction(start_marker, end_marker)
        results.subregions = cassis.create_subregions("gene1", [cluster],
                                                      record)
        assert results.subregions

        results.promoters = [
            Promoter("gene1", 10, 20, seq=Seq("cgtacgtacgt")),
            Promoter("gene2", 30, 40, seq=Seq("cgtacgtacgt")),
            CombinedPromoter("gene3", "gene4", 50, 60, seq=Seq("cgtacgtacgt"))
        ]

        round_trip = cassis.regenerate_previous_results(
            results.to_json(), record, None)
        assert isinstance(round_trip, cassis.CassisResults)
        assert len(results.subregions) == len(round_trip.subregions)
        for old, new in zip(results.subregions, round_trip.subregions):
            assert old.location == new.location
            assert old.to_biopython()[0].qualifiers == new.to_biopython(
            )[0].qualifiers
        assert round_trip.promoters == results.promoters
示例#4
0
    def test_store_subregions(self):
        # this test is similar to test_store_promoters
        anchor = "gene3"

        start_marker = ClusterMarker("gene1", Motif(3, 3, score=1))
        start_marker.promoter = "gene1"
        start_marker.abundance = 2
        end_marker = ClusterMarker("gene4", Motif(3, 3, score=1))
        end_marker.promoter = "gene3+gene4"
        assert end_marker.abundance == 1
        first_cluster = cassis.ClusterPrediction(start_marker, end_marker)
        first_cluster.promoters = 3
        first_cluster.genes = 4

        start_marker = ClusterMarker("gene1", Motif(4, 4, score=1))
        start_marker.promoter = "gene1"
        assert start_marker.abundance == 1
        end_marker = ClusterMarker("gene5", Motif(4, 4, score=1))
        end_marker.promoter = "gene5"
        assert end_marker.abundance == 1
        second_cluster = cassis.ClusterPrediction(start_marker, end_marker)
        second_cluster.promoters = 3
        second_cluster.genes = 4

        # order reversed because subregions are ordered by length when starts are the same
        region_predictions = [second_cluster, first_cluster]

        record_with_subregions = create_fake_record()
        record_without_subregions = create_fake_record(
        )  # just the same, without adding subregions

        subregions = cassis.create_subregions(anchor, region_predictions,
                                              record_with_subregions)
        assert record_with_subregions.get_feature_count(
        ) == record_without_subregions.get_feature_count()

        for region in subregions:
            record_with_subregions.add_subregion(region)

        # test subregion features
        expected_count = record_without_subregions.get_feature_count() + len(
            subregions)
        assert record_with_subregions.get_feature_count() == expected_count
        for i, region in enumerate(region_predictions):
            subregion = record_with_subregions.get_subregions()[i]
            self.assertEqual(subregion.type, "subregion")
            self.assertEqual(subregion.tool, "cassis")
            self.assertEqual(subregion.anchor, anchor)
            self.assertEqual(subregion.get_qualifier("genes"),
                             (region.genes, ))
            self.assertEqual(subregion.get_qualifier("promoters"),
                             (region.promoters, ))
            self.assertEqual(subregion.get_qualifier("gene_left"),
                             (region.start.gene, ))
            self.assertEqual(subregion.get_qualifier("gene_right"),
                             (region.end.gene, ))
示例#5
0
    def test_store_clusters(self):
        # this test is similar to test_store_promoters
        anchor = "gene3"

        start_marker = ClusterMarker("gene1", Motif(3, 3, score=1))
        start_marker.promoter = "gene1"
        start_marker.abundance = 2
        end_marker = ClusterMarker("gene4", Motif(3, 3, score=1))
        end_marker.promoter = "gene3+gene4"
        assert end_marker.abundance == 1
        first_cluster = cassis.ClusterPrediction(start_marker, end_marker)
        first_cluster.promoters = 3
        first_cluster.genes = 4

        start_marker = ClusterMarker("gene1", Motif(4, 4, score=1))
        start_marker.promoter = "gene1"
        assert start_marker.abundance == 1
        end_marker = ClusterMarker("gene5", Motif(4, 4, score=1))
        end_marker.promoter = "gene5"
        assert end_marker.abundance == 1
        second_cluster = cassis.ClusterPrediction(start_marker, end_marker)
        second_cluster.promoters = 3
        second_cluster.genes = 4

        clusters = [first_cluster, second_cluster]

        record_with_clusters = create_fake_record()
        record_without_clusters = create_fake_record(
        )  # just the same, without adding clusters

        borders = cassis.create_cluster_borders(anchor, clusters,
                                                record_with_clusters)
        assert record_with_clusters.get_feature_count(
        ) == record_without_clusters.get_feature_count()

        for border in borders:
            record_with_clusters.add_cluster_border(border)

        # test if store_clusters changed any non-cluster feature (should not!)  # TODO

        # test cluster features
        assert record_without_clusters.get_feature_count() + len(
            clusters) == record_with_clusters.get_feature_count()
        for i, cluster in enumerate(clusters):
            cluster_border = record_with_clusters.get_cluster_borders()[i]
            self.assertEqual(cluster_border.type, "cluster_border")
            self.assertEqual(cluster_border.tool, "cassis")
            self.assertEqual(cluster_border.get_qualifier("anchor"),
                             (anchor, ))
            self.assertEqual(cluster_border.get_qualifier("genes"),
                             (cluster.genes, ))
            self.assertEqual(cluster_border.get_qualifier("promoters"),
                             (cluster.promoters, ))
            self.assertEqual(cluster_border.get_qualifier("gene_left"),
                             (cluster.start.gene, ))
            self.assertEqual(cluster_border.get_qualifier("gene_right"),
                             (cluster.end.gene, ))
示例#6
0
    def test_sort_by_abundance(self):
        islands = []

        # island 1: [gene1 -- gene2]
        motif = Motif(0, 3, score=3, hits={"gene1": 1, "gene2": 1})
        islands.append(Island(Promoter("gene1", 1, 1), Promoter("gene2", 2, 2), motif))
        # island 2: [gene2 -- gene5]
        motif = Motif(3, 0, score=2, hits={"gene2": 1, "gene3": 1, "gene4": 1, "gene5": 1})
        islands.append(Island(Promoter("gene2", 2, 2), Promoter("gene5", 5, 5), motif))
        # island 3: [gene1 -- gene5]
        motif = Motif(3, 3, score=1, hits={"gene1": 1, "gene2": 1, "gene3": 1, "gene4": 1, "gene5": 1})
        islands.append(Island(Promoter("gene1", 1, 1), Promoter("gene5", 5, 5), motif))

        # left border: 2x gene1, 1x gene2
        # right border: 2x gene5, 1x gene2

        expected_clusters = []
        # cluster 1: [gene1 -- gene5] --> abundance 2+2 (most abundant)
        start = ClusterMarker("gene1", Motif(3, 3, score=1))
        start.abundance = 2
        end = ClusterMarker("gene5", Motif(3, 3, score=1))
        end.abundance = 2
        expected_clusters.append(ClusterPrediction(start, end))
        # cluster 3: [gene2 -- gene5] --> abundance 1+2, score 2+1 (better/lower)
        start = ClusterMarker("gene2", Motif(3, 0, score=2))
        start.abundance = 1
        end = ClusterMarker("gene5", Motif(3, 3, score=1))
        end.abundance = 2
        expected_clusters.append(ClusterPrediction(start, end))
        # cluster 2: [gene1 -- gene2] --> abundance 2+1, score 1+3 (worse, higher)
        start = ClusterMarker("gene1", Motif(3, 3, score=1))
        start.abundance = 2
        end = ClusterMarker("gene2", Motif(0, 3, score=3))
        end.abundance = 1
        expected_clusters.append(ClusterPrediction(start, end))
        # cluster 4: [gene2 -- gene2] --> abundance 1+1
        start = ClusterMarker("gene2", Motif(3, 0, score=2))
        start.abundance = 1
        end = ClusterMarker("gene2", Motif(0, 3, score=3))
        end.abundance = 1
        expected_clusters.append(ClusterPrediction(start, end))
        # abundance: as high as possible
        # score: as low as possible

        self.assertEqual(create_predictions(islands), expected_clusters)