def test_gorilla(self): """should correctly return a gorilla gene""" self.gorilla = Genome(species="gorilla", release=ENSEMBL_RELEASE, account=account) gene = self.gorilla.get_gene_by_stableid("ENSGGOG00000005730") self.assertEqual(str(gene.seq[:10]), "TCCATGCGTG")
def test_no_assembly(self): """return N's for coordinates with no assembly""" krat = Genome("Kangaroo rat", release=85) start = 24385 end = start + 100 region = krat.get_region(coord_name="scaffold_13754", start=start, end=end) self.assertEqual(str(region.seq), "N" * (end - start))
class GenomeTestBase(TestCase): human = Genome(species="human", release=ENSEMBL_RELEASE, account=account) mouse = Genome(species="mouse", release=ENSEMBL_RELEASE, account=account) rat = Genome(species="rat", release=ENSEMBL_RELEASE, account=account) macaq = Genome(species="macaque", release=ENSEMBL_RELEASE, account=account) gorilla = Genome(species="gorilla", release=ENSEMBL_RELEASE, account=account) brca2 = human.get_gene_by_stableid(stableid="ENSG00000139618")
class TestFeatureCoordLevels(TestCase): def setUp(self): self.chicken = Genome(species="chicken", release=ENSEMBL_RELEASE, account=account) def test_feature_levels(self): ChickenFeatureLevels = FeatureCoordLevels("chicken") chicken_feature_levels = ChickenFeatureLevels( feature_types=["gene", "cpg", "est"], core_db=self.chicken.CoreDb, otherfeature_db=self.chicken.OtherFeaturesDb, ) self.assertEqual(chicken_feature_levels["repeat"].levels, ["chromosome", "scaffold"]) self.assertEqual(set(chicken_feature_levels["cpg"].levels), set(["chromosome", "scaffold"])) def test_repeat(self): # use chicken genome as it need to do conversion # chicken coordinate correspondent toRefSeq human IL2A region coord = dict(coord_name=9, start=21729000, end=21730141) region = self.chicken.get_region(**coord) # repeat is recorded at contig level, strand is 0 repeats = region.get_features(feature_types="repeat") expect = [ ("9", 21729713, 21729732), ("9", 21729956, 21729968), ("9", 21730047, 21730076), ] obs = [] for repeat in repeats: loc = repeat.location obs.append((str(loc.coord_name), loc.start, loc.end)) self.assertEqual(set(obs), set(expect)) def test_cpg(self): # contain 3 CpG island recorded at chromosome level coord1 = dict(coord_name=26, start=105184, end=184346) cpgs1 = self.chicken.get_features(feature_types="cpg", **coord1) exp = [("26", 116624, 117610), ("26", 138598, 139523), ("26", 183375, 184708)] obs = [] for cpg in cpgs1: loc = cpg.location obs.append((str(loc.coord_name), loc.start, loc.end)) self.assertEqual(set(obs), set(exp)) # test cpg features record at scaffold level: coord2 = dict(coord_name="KQ759405.1", start=1, end=212434) cpgs2 = self.chicken.get_features(feature_types="cpg", **coord2) self.assertEqual(len(list(cpgs2)), 18)
def setUp(self): self.chicken = Genome(species="chicken", release=ENSEMBL_RELEASE, account=account)
def test_genome_comparison(self): """different genome instances with same CoreDb connection are equal""" h2 = Genome(species="human", release=ENSEMBL_RELEASE, account=account) self.assertEqual(self.human, h2)
class TestGenome(GenomeTestBase): def test_other_features(self): """should correctly return record for ENSESTG00000000010""" est = self.human.get_est_matching(stableid="ENSESTG00000000010") direct = list(est)[0] ests = self.human.get_features(feature_types="est", coord_name=6, start=99994000, end=100076519) stable_ids = [est.stableid for est in ests] self.assertIn(direct.stableid, stable_ids) def test_genome_comparison(self): """different genome instances with same CoreDb connection are equal""" h2 = Genome(species="human", release=ENSEMBL_RELEASE, account=account) self.assertEqual(self.human, h2) def test_make_location(self): """should correctly make a location for an entire chromosome""" loc = self.human.make_location(coord_name=1) self.assertEqual(len(loc), 248956422) def test_get_region(self): """should return a generic region that extracts correct sequence""" chrom = 1 start = 11137 end = start + 20 region = self.human.get_region(coord_name=chrom, start=start, end=end, ensembl_coord=True) self.assertEqual(region.location.start, start - 1) self.assertEqual(region.location.end, end) self.assertEqual(region.location.coord_name, str(chrom)) self.assertEqual(region.location.coord_type, "chromosome") self.assertEqual(region.seq, "ACCTCAGTAATCCGAAAAGCC") def test_get_assembly_exception_region(self): """should return correct sequence for region with an assembly exception""" region = self.human.get_region(coord_name="Y", start=57211873, end=57211894, strand=1, ensembl_coord=True) self.assertEqual(str(region.seq), "CGAGGACGACTGGGAATCCTAG") def test_no_assembly(self): """return N's for coordinates with no assembly""" krat = Genome("Kangaroo rat", release=85) start = 24385 end = start + 100 region = krat.get_region(coord_name="scaffold_13754", start=start, end=end) self.assertEqual(str(region.seq), "N" * (end - start)) def test_getting_annotated_seq(self): """a region should return a sequence with the correct annotation""" new_loc = self.brca2.location.resized(-100, 100) region = self.human.get_region(region=new_loc) annot_seq = region.get_annotated_seq(feature_types="gene") gene_annots = annot_seq.get_annotations_matching("gene") got_symbols = {a.name for a in gene_annots} self.assertTrue(self.brca2.symbol in got_symbols) def test_correct_feature_type_id_cache(self): """should obtain the feature type identifiers without failure""" self.assertNotEqual(self.human._feature_type_ids.CpGisland, None) def test_strand_conversion(self): """should consistently convert strand info""" self.assertEqual(convert_strand(None), 1) self.assertEqual(convert_strand(-1), -1) self.assertEqual(convert_strand(1), 1) self.assertEqual(convert_strand("-"), -1) self.assertEqual(convert_strand("+"), 1) self.assertEqual(convert_strand(-1.0), -1) self.assertEqual(convert_strand(1.0), 1) def test_pool_connection(self): """excercising ability to specify pool connection""" dog = Genome(species="dog", release=ENSEMBL_RELEASE, account=account, pool_recycle=1000) def test_gorilla(self): """should correctly return a gorilla gene""" self.gorilla = Genome(species="gorilla", release=ENSEMBL_RELEASE, account=account) gene = self.gorilla.get_gene_by_stableid("ENSGGOG00000005730") self.assertEqual(str(gene.seq[:10]), "TCCATGCGTG") def test_diff_strand_contig_chrom(self): """get correct sequence when contig and chromosome strands differ""" gene = self.gorilla.get_gene_by_stableid("ENSGGOG00000001953") cds = gene.canonical_transcript.cds self.assertEqual( str(cds), "ATGGCCCAGGATCTCAGCGAGAAGGACCTGTTGAAGATGGAGGTGGAGCAGCTGAAGAAA" "GAAGTGAAAAACACAAGAATTCCGATTTCCAAAGCGGGAAAGGAAATCAAAGAGTACGTG" "GAGGCCCAAGCAGGAAACGATCCTTTTCTCAAAGGCATCCCTGAGGACAAGAATCCCTTC" "AAGGAGAAAGGACCCACATTTAACGCCTTACTTCTTTTGCTGGGAAGAGCTTCTTGGTTG" "GAGCTAACCCGGTCTAGGACACCATAG", ) def test_get_distinct_biotype(self): """Genome instance get_distinct for biotype should work on all genomes""" for genome in self.gorilla, self.human, self.mouse, self.rat, self.macaq: biotypes = genome.get_distinct("biotype")
def test_pool_connection(self): """excercising ability to specify pool connection""" dog = Genome(species="dog", release=ENSEMBL_RELEASE, account=account, pool_recycle=1000)
__version__ = "3.0a1" __maintainer__ = "Gavin Huttley" __email__ = "*****@*****.**" __status__ = "alpha" if "ENSEMBL_ACCOUNT" in os.environ: args = os.environ["ENSEMBL_ACCOUNT"].split() host, username, password = args[0:3] kwargs = {} if len(args) > 3: kwargs["port"] = int(args[3]) account = HostAccount(host, username, password, **kwargs) else: account = get_ensembl_account(release=ENSEMBL_RELEASE) human = Genome(species="human", release=ENSEMBL_RELEASE, account=account) platypus = Genome(species="platypus", release=ENSEMBL_RELEASE, account=account) class TestLocation(TestCase): def test_init(self): human_loc = Coordinate(coord_name="x", start=1000, end=10000, strand=-1, genome=human) # TODO: complete test for platpus self.assertEqual(human_loc.coord_type, "chromosome") self.assertEqual(human_loc.coord_name, "x") self.assertEqual(human_loc.start, 1000) self.assertEqual(human_loc.end, 10000)