Exemplo n.º 1
0
 def test_multi_cds_tracking(self):
     domains = [DummyAntismashDomain(locus_tag=i) for i in "AB"]
     module = create_module(domains=domains)
     assert module.is_multigene_module()
     record = DummyRecord()
     add_module_references_to_record(module, record)
     record.add_cds_feature(DummyCDS(locus_tag="C"))
     for cds in record.get_cds_features():
         assert not cds.modules
     assert not record.get_modules()
     record.add_module(module)
     # make sure it's not added to every CDS
     assert not record.get_cds_by_name("C").modules
     # but that it is added to all CDSes with a domain included
     for i in "AB":
         assert record.get_cds_by_name(i).modules == (module, )
Exemplo n.º 2
0
def _create_dummy_record(reverse=False):
    seq = Seq('GTGGAGCGGTACTAAATGTACTCCACTATCTGCTGATTGGAAACCACGGAGCGCTCTTAG',
              generic_dna)
    strand = 1
    if reverse:
        seq = seq.reverse_complement()
        strand = -1
    rec = DummyRecord(seq=str(seq))

    idx = 1
    for start, end in [(0, 15), (15, 36), (36, 60)]:
        if reverse:
            start, end = len(seq) - end + 3, len(seq) - start  # TODO: check this
        rec.add_cds_feature(DummyCDS(start, end, strand=strand,
                            locus_tag="orf%04d" % idx))
        idx += 1

    return rec
Exemplo n.º 3
0
class TestDistanceCalculations(unittest.TestCase):
    def setUp(self):
        self.record = DummyRecord(seq="A" * 100000)
        self.query = self.create_cds(50000, 50000, ["query_gene_prof"])
        self.record.add_cds_feature(self.query)

    def create_cds(self, start, end, profiles, strand=1):
        cds = DummyCDS(start, end, strand=strand, translation="A")
        for profile in profiles:
            cds.sec_met.add_domains(
                [cds.sec_met.Domain(profile, 1e-5, 20.5, 2, "test")])
        return cds

    def test_empty_record(self):
        self.record._cds_features.clear()
        assert utils.distance_to_pfam(self.record, self.query, []) == -1

    def test_self_hit(self):
        assert utils.distance_to_pfam(self.record, self.query,
                                      ["query_gene_prof"]) == 0

    def test_simple_before(self):
        cds = self.create_cds(29000, 30000, profiles=["left20k"])
        self.record.add_cds_feature(cds)
        assert utils.distance_to_pfam(self.record, self.query,
                                      ["notleft20k"]) == -1
        assert utils.distance_to_pfam(self.record, self.query,
                                      ["left20k"]) == 20000

    def test_simple_after(self):
        cds = self.create_cds(60000, 63000, profiles=["right10k"])
        self.record.add_cds_feature(cds)
        assert utils.distance_to_pfam(self.record, self.query,
                                      ["notright10k"]) == -1
        assert utils.distance_to_pfam(self.record, self.query,
                                      ["right10k"]) == 10000

    def test_outside_before(self):
        cds = self.create_cds(5000, 9999, profiles=["outside"])
        self.record.add_cds_feature(cds)
        assert utils.distance_to_pfam(self.record, self.query,
                                      ["outside"]) == -1

    def test_outside_after(self):
        cds = self.create_cds(90001, 95000, profiles=["outside"])
        self.record.add_cds_feature(cds)
        assert utils.distance_to_pfam(self.record, self.query,
                                      ["outside"]) == -1

    def test_edge_overlap_after(self):
        cds = self.create_cds(90000, 91000, profiles=["r.edge"])
        self.record.add_cds_feature(cds)
        assert utils.distance_to_pfam(self.record, self.query,
                                      ["r.edge"]) == -1

        cds.location = FeatureLocation(89999, 91000, strand=1)
        assert utils.distance_to_pfam(self.record, self.query,
                                      ["r.edge"]) == 39999

        cds.location = FeatureLocation(89999, 91000, strand=-1)
        assert utils.distance_to_pfam(self.record, self.query,
                                      ["r.edge"]) == 39999

    def test_edge_overlap_before(self):
        cds = self.create_cds(9000, 10000, profiles=["l.edge"])
        self.record.add_cds_feature(cds)
        assert utils.distance_to_pfam(self.record, self.query,
                                      ["l.edge"]) == -1

        cds.location = FeatureLocation(9000, 10001, strand=1)
        assert utils.distance_to_pfam(self.record, self.query,
                                      ["l.edge"]) == 39999

        cds.location = FeatureLocation(9000, 10001, strand=-1)
        assert utils.distance_to_pfam(self.record, self.query,
                                      ["l.edge"]) == 39999

    def test_with_no_secmet(self):
        cds = self.create_cds(55000, 60000, profiles=[])
        cds.sec_met = SecMetQualifier()
        self.record.add_cds_feature(cds)
        assert utils.distance_to_pfam(self.record, self.query, ["test"]) == -1
Exemplo n.º 4
0
class ClusterFinderTest(unittest.TestCase):
    def setUp(self):
        self.config = build_config([
            "--cf-create-clusters", "--cf-mean-threshold", "0.6",
            "--cf-min-cds", "5", "--cf-min-pfams", "5"
        ],
                                   modules=[clusterfinder],
                                   isolated=True)
        update_config({"enabled_cluster_types": []})

        self.record = DummyRecord(seq=Seq("A" * 2000))
        for start, end, probability, pfam_id in [(10, 20, 0.1, 'PF77777'),
                                                 (30, 40, 0.3, 'PF00106'),
                                                 (50, 60, 0.4, 'PF00107'),
                                                 (60, 70, 0.7, 'PF00109'),
                                                 (70, 80, 0.98, 'PF08484'),
                                                 (90, 100, 0.8, 'PF02401'),
                                                 (100, 110, 0.32, 'PF04369'),
                                                 (110, 120, 1.0, 'PF00128'),
                                                 (130, 140, 0.2, 'PF77776'),
                                                 (500, 505, None, 'PF77775'),
                                                 (1010, 1020, 0.1, 'PF77774'),
                                                 (1030, 1040, 0.3, 'PF00106'),
                                                 (1050, 1060, 0.4, 'PF00107'),
                                                 (1060, 1070, 0.7, 'PF00109'),
                                                 (1070, 1080, 0.98, 'PF08484'),
                                                 (1090, 1100, 0.8, 'PF02401'),
                                                 (1100, 1110, 0.32, 'PF04369'),
                                                 (1110, 1120, 1.0, 'PF00128')]:
            location = FeatureLocation(start, end, strand=1)
            self.record.add_cds_feature(
                CDSFeature(location, locus_tag=str(start), translation="A"))
            pfam = DummyPFAMDomain(location=location,
                                   protein_start=start + 1,
                                   protein_end=end - 1,
                                   identifier=pfam_id)
            pfam.domain_id = "pfam_%d" % start
            pfam.probability = probability
            self.record.add_pfam_domain(pfam)

    def tearDown(self):
        destroy_config()

    def test_options(self):
        for val in [-1., -.01, -0.0001, 1.0001, 2.]:
            update_config({"cf_threshold": val})
            assert len(clusterfinder.check_options(self.config)) == 1

    def test_check_prereqs(self):
        self.assertEqual([], clusterfinder.check_prereqs(self.config))

    def test_find_nr_cds(self):
        left = (0, 5)
        newpos, num = clusterfinder.probabilistic.find_nr_cds(
            left, self.record)
        self.assertEqual(left, newpos)
        self.assertEqual(0, num)

        right = (150, 160)
        newpos, count = clusterfinder.probabilistic.find_nr_cds(
            right, self.record)
        assert newpos == right
        assert count == 0

        middle = (35, 115)
        newpos, count = clusterfinder.probabilistic.find_nr_cds(
            middle, self.record)
        assert newpos == (30, 120)
        assert count == 7

        small = (501, 504)
        newpos, count = clusterfinder.probabilistic.find_nr_cds(
            small, self.record)
        assert newpos == (500, 505)
        assert count == 1

    def test_find_probabilistic_clusters(self):
        ret = clusterfinder.find_probabilistic_clusters(
            self.record, self.config)
        assert len(ret) == 2
        assert ret[0].location.start == 30
        assert ret[0].location.end == 120
        assert ret[1].location.start == 1030
        assert ret[1].location.end == 1120

    def test_no_overlaps(self):
        results = clusterfinder.generate_results(self.record, self.config)
        areas = self.record.get_subregions()

        assert len(results.areas) == 2
        assert len(areas) == 2
        assert list(areas) == results.areas

        area = areas[0]
        assert area.location.start == 30
        assert area.location.end == 120
        self.assertAlmostEqual(0.6429, area.probability, places=4)

        area = areas[1]
        assert area.location.start == 1030
        assert area.location.end == 1120
        self.assertAlmostEqual(0.6429, area.probability, places=4)

    def test_full_loop(self):
        results = clusterfinder.run_on_record(self.record, None, self.config)
        old_areas = len(results.areas)

        json_string = json.dumps(results.to_json())
        regenned = clusterfinder.regenerate_previous_results(
            json.loads(json_string), self.record, self.config)
        assert len(regenned.areas) == old_areas
        for old, new in zip(results.areas, regenned.areas):
            assert old.location == new.location
            self.assertAlmostEqual(old.probability, new.probability, places=6)
Exemplo n.º 5
0
class ClusterFinderTest(unittest.TestCase):
    def setUp(self):
        self.config = build_config([
            "--cf-create-clusters", "--cf-mean-threshold", "0.6",
            "--cf-min-cds", "5", "--cf-min-pfams", "5"
        ],
                                   modules=[clusterfinder],
                                   isolated=True)
        update_config({"enabled_cluster_types": []})

        self.record = DummyRecord(seq=Seq("A" * 2000))
        for start, end, probability, pfam_id in [(10, 20, 0.1, 'FAKE007'),
                                                 (30, 40, 0.3, 'PF00106'),
                                                 (50, 60, 0.4, 'PF00107'),
                                                 (60, 70, 0.7, 'PF00109'),
                                                 (70, 80, 0.98, 'PF08484'),
                                                 (90, 100, 0.8, 'PF02401'),
                                                 (100, 110, 0.32, 'PF04369'),
                                                 (110, 120, 1.0, 'PF00128'),
                                                 (130, 140, 0.2, 'FAKE234'),
                                                 (500, 505, None, 'FAKE505'),
                                                 (1010, 1020, 0.1, 'FAKE007'),
                                                 (1030, 1040, 0.3, 'PF00106'),
                                                 (1050, 1060, 0.4, 'PF00107'),
                                                 (1060, 1070, 0.7, 'PF00109'),
                                                 (1070, 1080, 0.98, 'PF08484'),
                                                 (1090, 1100, 0.8, 'PF02401'),
                                                 (1100, 1110, 0.32, 'PF04369'),
                                                 (1110, 1120, 1.0, 'PF00128')]:
            location = FeatureLocation(start, end)
            self.record.add_cds_feature(
                CDSFeature(location, locus_tag=str(start)))
            pfam = PFAMDomain(location, "dummy_description")
            pfam.db_xref.append(pfam_id)
            pfam.probability = probability
            self.record.add_pfam_domain(pfam)

    def tearDown(self):
        destroy_config()

    def test_options(self):
        for val in [-1., -.01, -0.0001, 1.0001, 2.]:
            update_config({"cf_threshold": val})
            assert len(clusterfinder.check_options(self.config)) == 1

    def test_check_prereqs(self):
        self.assertEqual([], clusterfinder.check_prereqs())

    def test_find_nr_cds(self):
        left = (0, 5)
        newpos, num = clusterfinder.probabilistic.find_nr_cds(
            left, self.record)
        self.assertEqual(left, newpos)
        self.assertEqual(0, num)

        right = (150, 160)
        newpos, count = clusterfinder.probabilistic.find_nr_cds(
            right, self.record)
        assert newpos == right
        assert count == 0

        middle = (35, 115)
        newpos, count = clusterfinder.probabilistic.find_nr_cds(
            middle, self.record)
        assert newpos == [30, 120]
        assert count == 7

        small = (501, 504)
        newpos, count = clusterfinder.probabilistic.find_nr_cds(
            small, self.record)
        assert newpos == [500, 505]
        assert count == 1

    def test_find_probabilistic_clusters(self):
        ret = clusterfinder.find_probabilistic_clusters(
            self.record, self.config)
        assert len(ret) == 2
        assert ret[0].location.start == 30
        assert ret[0].location.end == 120
        assert ret[1].location.start == 1030
        assert ret[1].location.end == 1120

    def test_no_overlaps(self):
        results = clusterfinder.generate_results(self.record, self.config)

        self.assertEqual(2, len(results.borders))
        assert list(self.record.get_cluster_borders()) == results.borders
        cluster1, cluster2 = self.record.get_cluster_borders()

        assert cluster1.location.start == 30
        assert cluster1.location.end == 120
        assert not cluster1.high_priority_product
        self.assertAlmostEqual(0.6429, cluster1.probability, places=4)

        assert cluster2.location.start == 1030
        assert cluster2.location.end == 1120
        assert not cluster2.high_priority_product
        self.assertAlmostEqual(0.6429, cluster2.probability, places=4)

    def test_merges(self):
        clusterfinder.generate_results(self.record, self.config)
        assert len(self.record.get_cluster_borders()) == 2

        for start, end in [(10, 40), (1040, 1050), (110, 400)]:
            loc = FeatureLocation(start, end)
            self.record.add_cluster_border(
                ClusterBorder(loc, "testtool", product=str(start)))

        assert not self.record.get_clusters()

        self.record.create_clusters_from_borders()

        assert len(self.record.get_clusters()) == 2

        cluster1, cluster2 = self.record.get_clusters()

        assert cluster1.location.start == 10
        assert cluster1.location.end == 400
        assert cluster1.products == ("10", "110")
        assert cluster2.location.start == 1030
        assert cluster2.location.end == 1120
        assert cluster2.products == ("1040", )