Пример #1
0
 def test_gorilla(self):
     """should correctly return a gorilla gene"""
     self.gorilla = Genome(species="gorilla",
                           release=ENSEMBL_RELEASE,
                           account=account)
     gene = self.gorilla.get_gene_by_stableid("ENSGGOG00000005730")
     self.assertEqual(str(gene.seq[:10]), "TCCATGCGTG")
Пример #2
0
 def test_no_assembly(self):
     """return N's for coordinates with no assembly"""
     krat = Genome("Kangaroo rat", release=85)
     start = 24385
     end = start + 100
     region = krat.get_region(coord_name="scaffold_13754",
                              start=start,
                              end=end)
     self.assertEqual(str(region.seq), "N" * (end - start))
Пример #3
0
class GenomeTestBase(TestCase):
    human = Genome(species="human", release=ENSEMBL_RELEASE, account=account)
    mouse = Genome(species="mouse", release=ENSEMBL_RELEASE, account=account)
    rat = Genome(species="rat", release=ENSEMBL_RELEASE, account=account)
    macaq = Genome(species="macaque", release=ENSEMBL_RELEASE, account=account)
    gorilla = Genome(species="gorilla",
                     release=ENSEMBL_RELEASE,
                     account=account)
    brca2 = human.get_gene_by_stableid(stableid="ENSG00000139618")
Пример #4
0
class TestFeatureCoordLevels(TestCase):
    def setUp(self):
        self.chicken = Genome(species="chicken",
                              release=ENSEMBL_RELEASE,
                              account=account)

    def test_feature_levels(self):
        ChickenFeatureLevels = FeatureCoordLevels("chicken")
        chicken_feature_levels = ChickenFeatureLevels(
            feature_types=["gene", "cpg", "est"],
            core_db=self.chicken.CoreDb,
            otherfeature_db=self.chicken.OtherFeaturesDb,
        )
        self.assertEqual(chicken_feature_levels["repeat"].levels,
                         ["chromosome", "scaffold"])
        self.assertEqual(set(chicken_feature_levels["cpg"].levels),
                         set(["chromosome", "scaffold"]))

    def test_repeat(self):
        # use chicken genome as it need to do conversion
        # chicken coordinate correspondent toRefSeq human IL2A region
        coord = dict(coord_name=9, start=21729000, end=21730141)
        region = self.chicken.get_region(**coord)
        # repeat is recorded at contig level, strand is 0
        repeats = region.get_features(feature_types="repeat")
        expect = [
            ("9", 21729713, 21729732),
            ("9", 21729956, 21729968),
            ("9", 21730047, 21730076),
        ]
        obs = []
        for repeat in repeats:
            loc = repeat.location
            obs.append((str(loc.coord_name), loc.start, loc.end))
        self.assertEqual(set(obs), set(expect))

    def test_cpg(self):
        # contain 3 CpG island recorded at chromosome level
        coord1 = dict(coord_name=26, start=105184, end=184346)
        cpgs1 = self.chicken.get_features(feature_types="cpg", **coord1)
        exp = [("26", 116624, 117610), ("26", 138598, 139523),
               ("26", 183375, 184708)]
        obs = []
        for cpg in cpgs1:
            loc = cpg.location
            obs.append((str(loc.coord_name), loc.start, loc.end))
        self.assertEqual(set(obs), set(exp))

        # test cpg features record at scaffold level:
        coord2 = dict(coord_name="KQ759405.1", start=1, end=212434)
        cpgs2 = self.chicken.get_features(feature_types="cpg", **coord2)
        self.assertEqual(len(list(cpgs2)), 18)
Пример #5
0
 def setUp(self):
     self.chicken = Genome(species="chicken",
                           release=ENSEMBL_RELEASE,
                           account=account)
Пример #6
0
 def test_genome_comparison(self):
     """different genome instances with same CoreDb connection are equal"""
     h2 = Genome(species="human", release=ENSEMBL_RELEASE, account=account)
     self.assertEqual(self.human, h2)
Пример #7
0
class TestGenome(GenomeTestBase):
    def test_other_features(self):
        """should correctly return record for ENSESTG00000000010"""
        est = self.human.get_est_matching(stableid="ENSESTG00000000010")
        direct = list(est)[0]
        ests = self.human.get_features(feature_types="est",
                                       coord_name=6,
                                       start=99994000,
                                       end=100076519)
        stable_ids = [est.stableid for est in ests]
        self.assertIn(direct.stableid, stable_ids)

    def test_genome_comparison(self):
        """different genome instances with same CoreDb connection are equal"""
        h2 = Genome(species="human", release=ENSEMBL_RELEASE, account=account)
        self.assertEqual(self.human, h2)

    def test_make_location(self):
        """should correctly make a location for an entire chromosome"""
        loc = self.human.make_location(coord_name=1)
        self.assertEqual(len(loc), 248956422)

    def test_get_region(self):
        """should return a generic region that extracts correct sequence"""
        chrom = 1
        start = 11137
        end = start + 20
        region = self.human.get_region(coord_name=chrom,
                                       start=start,
                                       end=end,
                                       ensembl_coord=True)
        self.assertEqual(region.location.start, start - 1)
        self.assertEqual(region.location.end, end)
        self.assertEqual(region.location.coord_name, str(chrom))
        self.assertEqual(region.location.coord_type, "chromosome")
        self.assertEqual(region.seq, "ACCTCAGTAATCCGAAAAGCC")

    def test_get_assembly_exception_region(self):
        """should return correct sequence for region with an assembly
        exception"""
        region = self.human.get_region(coord_name="Y",
                                       start=57211873,
                                       end=57211894,
                                       strand=1,
                                       ensembl_coord=True)

        self.assertEqual(str(region.seq), "CGAGGACGACTGGGAATCCTAG")

    def test_no_assembly(self):
        """return N's for coordinates with no assembly"""
        krat = Genome("Kangaroo rat", release=85)
        start = 24385
        end = start + 100
        region = krat.get_region(coord_name="scaffold_13754",
                                 start=start,
                                 end=end)
        self.assertEqual(str(region.seq), "N" * (end - start))

    def test_getting_annotated_seq(self):
        """a region should return a sequence with the correct annotation"""
        new_loc = self.brca2.location.resized(-100, 100)
        region = self.human.get_region(region=new_loc)
        annot_seq = region.get_annotated_seq(feature_types="gene")
        gene_annots = annot_seq.get_annotations_matching("gene")
        got_symbols = {a.name for a in gene_annots}
        self.assertTrue(self.brca2.symbol in got_symbols)

    def test_correct_feature_type_id_cache(self):
        """should obtain the feature type identifiers without failure"""
        self.assertNotEqual(self.human._feature_type_ids.CpGisland, None)

    def test_strand_conversion(self):
        """should consistently convert strand info"""
        self.assertEqual(convert_strand(None), 1)
        self.assertEqual(convert_strand(-1), -1)
        self.assertEqual(convert_strand(1), 1)
        self.assertEqual(convert_strand("-"), -1)
        self.assertEqual(convert_strand("+"), 1)
        self.assertEqual(convert_strand(-1.0), -1)
        self.assertEqual(convert_strand(1.0), 1)

    def test_pool_connection(self):
        """excercising ability to specify pool connection"""
        dog = Genome(species="dog",
                     release=ENSEMBL_RELEASE,
                     account=account,
                     pool_recycle=1000)

    def test_gorilla(self):
        """should correctly return a gorilla gene"""
        self.gorilla = Genome(species="gorilla",
                              release=ENSEMBL_RELEASE,
                              account=account)
        gene = self.gorilla.get_gene_by_stableid("ENSGGOG00000005730")
        self.assertEqual(str(gene.seq[:10]), "TCCATGCGTG")

    def test_diff_strand_contig_chrom(self):
        """get correct sequence when contig and chromosome strands differ"""
        gene = self.gorilla.get_gene_by_stableid("ENSGGOG00000001953")
        cds = gene.canonical_transcript.cds
        self.assertEqual(
            str(cds),
            "ATGGCCCAGGATCTCAGCGAGAAGGACCTGTTGAAGATGGAGGTGGAGCAGCTGAAGAAA"
            "GAAGTGAAAAACACAAGAATTCCGATTTCCAAAGCGGGAAAGGAAATCAAAGAGTACGTG"
            "GAGGCCCAAGCAGGAAACGATCCTTTTCTCAAAGGCATCCCTGAGGACAAGAATCCCTTC"
            "AAGGAGAAAGGACCCACATTTAACGCCTTACTTCTTTTGCTGGGAAGAGCTTCTTGGTTG"
            "GAGCTAACCCGGTCTAGGACACCATAG",
        )

    def test_get_distinct_biotype(self):
        """Genome instance get_distinct for biotype should work on all genomes"""
        for genome in self.gorilla, self.human, self.mouse, self.rat, self.macaq:
            biotypes = genome.get_distinct("biotype")
Пример #8
0
 def test_pool_connection(self):
     """excercising ability to specify pool connection"""
     dog = Genome(species="dog",
                  release=ENSEMBL_RELEASE,
                  account=account,
                  pool_recycle=1000)
Пример #9
0
__version__ = "3.0a1"
__maintainer__ = "Gavin Huttley"
__email__ = "*****@*****.**"
__status__ = "alpha"

if "ENSEMBL_ACCOUNT" in os.environ:
    args = os.environ["ENSEMBL_ACCOUNT"].split()
    host, username, password = args[0:3]
    kwargs = {}
    if len(args) > 3:
        kwargs["port"] = int(args[3])
    account = HostAccount(host, username, password, **kwargs)
else:
    account = get_ensembl_account(release=ENSEMBL_RELEASE)

human = Genome(species="human", release=ENSEMBL_RELEASE, account=account)
platypus = Genome(species="platypus", release=ENSEMBL_RELEASE, account=account)


class TestLocation(TestCase):
    def test_init(self):
        human_loc = Coordinate(coord_name="x",
                               start=1000,
                               end=10000,
                               strand=-1,
                               genome=human)
        # TODO: complete test for platpus
        self.assertEqual(human_loc.coord_type, "chromosome")
        self.assertEqual(human_loc.coord_name, "x")
        self.assertEqual(human_loc.start, 1000)
        self.assertEqual(human_loc.end, 10000)