def test_gorilla(self): """should correctly return a gorilla gene""" self.gorilla = Genome(Species="gorilla", Release=Release, account=account) gene = self.gorilla.getGeneByStableId('ENSGGOG00000005730') self.assertEquals(str(gene.Seq[:10]), 'TGGGAGTCCA')
class GenomeTestBase(TestCase): human = Genome(Species="human", Release=Release, account=account) mouse = Genome(Species="mouse", Release=Release, account=account) rat = Genome(Species="rat", Release=Release, account=account) macaq = Genome(Species="macaque", Release=Release, account=account) gorilla = Genome(Species="gorilla", Release=Release, account=account) brca2 = human.getGeneByStableId(StableId="ENSG00000139618")
def test_no_assembly(self): """return N's for coordinates with no assembly""" krat = Genome('Kangaroo rat', Release=58) Start=24385 End=Start+100 region = krat.getRegion(CoordName='scaffold_13754', Start=Start, End=End) self.assertEquals(str(region.Seq), 'N' * (End-Start))
def test_no_assembly(self): """return N's for coordinates with no assembly""" krat = Genome('Kangaroo rat', Release=58) Start = 24385 End = Start + 100 region = krat.getRegion(CoordName='scaffold_13754', Start=Start, End=End) self.assertEquals(str(region.Seq), 'N' * (End - Start))
class TestFeatureCoordLevels(TestCase): def setUp(self): self.chicken = Genome(Species='chicken', Release=Release, account=account) def test_feature_levels(self): ChickenFeatureLevels = FeatureCoordLevels('chicken') chicken_feature_levels = ChickenFeatureLevels( feature_types=['gene', 'cpg', 'est'], core_db=self.chicken.CoreDb, otherfeature_db=self.chicken.OtherFeaturesDb) self.assertEquals(chicken_feature_levels['repeat'].levels, ['chromosome', 'scaffold']) self.assertEquals(set(chicken_feature_levels['cpg'].levels), set(['chromosome', 'scaffold'])) def test_repeat(self): # use chicken genome as it need to do conversion # chicken coordinate correspondent toRefSeq human IL2A region coord = dict(CoordName=9, Start=21727352, End=21729141) region = self.chicken.getRegion(**coord) # repeat is recorded at contig level, strand is 0 repeats = region.getFeatures(feature_types='repeat') expect = [("9", 21727499, 21727527), ("9", 21728009, 21728018), ("9", 21728169, 21728178)] obs = [] for repeat in repeats: loc = repeat.Location obs.append((str(loc.CoordName), loc.Start, loc.End)) self.assertEquals(set(obs), set(expect)) def test_cpg(self): # contain 3 CpG island recorded at chromosome level coord1 = dict(CoordName=26, Start=105184, End=184346) cpgs1 = self.chicken.getFeatures(feature_types='cpg', **coord1) exp = [("26", 112153, 113139), ("26", 134125, 135050), ("26", 178899, 180227)] obs = [] for cpg in cpgs1: loc = cpg.Location obs.append((str(loc.CoordName), loc.Start, loc.End)) self.assertEquals(set(obs), set(exp)) # test cpg features record at scaffold level: coord2 = dict(CoordName='JH376196.1', Start=1, End=14640) cpgs2 = self.chicken.getFeatures(feature_types='cpg', **coord2) self.assertEquals(len(list(cpgs2)), 3)
class TestFeatureCoordLevels(TestCase): def setUp(self): self.chicken = Genome(Species='chicken', Release=Release, account=account) def test_feature_levels(self): ChickenFeatureLevels = FeatureCoordLevels('chicken') chicken_feature_levels = ChickenFeatureLevels( feature_types=['gene', 'cpg', 'est'], core_db=self.chicken.CoreDb, otherfeature_db=self.chicken.OtherFeaturesDb) self.assertEquals(chicken_feature_levels['repeat'].levels, ['contig']) self.assertEquals(set(chicken_feature_levels['cpg'].levels),\ set(['contig', 'supercontig', 'chromosome'])) def test_repeat(self): # use chicken genome as it need to do conversion # chicken coordinate correspondent toRefSeq human IL2A region coord = dict(CoordName=9, Start=23817146, End=23818935) region = self.chicken.getRegion(**coord) # repeat is recorded at contig level, strand is 0 repeats = region.getFeatures(feature_types='repeat') expect = [("9", 23817293, 23817321), ("9", 23817803, 23817812), ("9", 23817963, 23817972)] obs = [] for repeat in repeats: loc = repeat.Location obs.append((loc.CoordName, loc.Start, loc.End)) self.assertEquals(set(obs), set(expect)) def test_cpg(self): # contain 3 CpG island recorded at chromosome level coord1 = dict(CoordName=26, Start=110000, End=190000) cpgs1 = self.chicken.getFeatures(feature_types='cpg', **coord1) exp = [("26", 116969, 117955), ("26", 139769, 140694), ("26", 184546, 185881)] obs = [] for cpg in cpgs1: loc = cpg.Location obs.append((loc.CoordName, loc.Start, loc.End)) self.assertEquals(set(exp), set(obs)) # test cpg features record at supercontig level: coord2 = dict(CoordName='Un_random', Start=29434117, End=29439117) cpgs2 = self.chicken.getFeatures(feature_types='cpg', **coord2) self.assertEquals(len(list(cpgs2)), 1)
class TestFeatureCoordLevels(TestCase): def setUp(self): self.chicken = Genome(Species='chicken', Release=Release, account=account) def test_feature_levels(self): ChickenFeatureLevels = FeatureCoordLevels('chicken') chicken_feature_levels = ChickenFeatureLevels( feature_types=['gene', 'cpg', 'est'], core_db=self.chicken.CoreDb, otherfeature_db=self.chicken.OtherFeaturesDb) self.assertEquals(chicken_feature_levels['repeat'].levels, ['chromosome', 'scaffold']) self.assertEquals(set(chicken_feature_levels['cpg'].levels), set(['chromosome', 'scaffold'])) def test_repeat(self): # use chicken genome as it need to do conversion # chicken coordinate correspondent toRefSeq human IL2A region coord = dict(CoordName=9, Start=21727352, End=21729141) region = self.chicken.getRegion(**coord) # repeat is recorded at contig level, strand is 0 repeats = region.getFeatures(feature_types = 'repeat') expect = [("9", 21727499, 21727527), ("9", 21728009, 21728018), ("9", 21728169, 21728178)] obs = [] for repeat in repeats: loc = repeat.Location obs.append((str(loc.CoordName), loc.Start, loc.End)) self.assertEquals(set(obs), set(expect)) def test_cpg(self): # contain 3 CpG island recorded at chromosome level coord1 = dict(CoordName=26, Start=105184, End=184346) cpgs1 = self.chicken.getFeatures(feature_types='cpg', **coord1) exp = [("26", 112153, 113139), ("26", 134125, 135050), ("26", 178899, 180227)] obs = [] for cpg in cpgs1: loc = cpg.Location obs.append((str(loc.CoordName), loc.Start, loc.End)) self.assertEquals(set(obs), set(exp)) # test cpg features record at scaffold level: coord2 = dict(CoordName='JH376196.1', Start=1, End=14640) cpgs2 = self.chicken.getFeatures(feature_types='cpg', **coord2) self.assertEquals(len(list(cpgs2)), 3)
class TestFeatureCoordLevels(TestCase): def setUp(self): self.chicken = Genome(Species='chicken', Release=Release, account=account) def test_feature_levels(self): ChickenFeatureLevels = FeatureCoordLevels('chicken') chicken_feature_levels = ChickenFeatureLevels( feature_types=['gene', 'cpg', 'est'], core_db=self.chicken.CoreDb, otherfeature_db=self.chicken.OtherFeaturesDb) self.assertEquals(chicken_feature_levels['repeat'].levels, ['contig']) self.assertEquals(set(chicken_feature_levels['cpg'].levels),\ set(['contig', 'supercontig', 'chromosome'])) def test_repeat(self): # use chicken genome as it need to do conversion # chicken coordinate correspondent toRefSeq human IL2A region coord = dict(CoordName=9, Start=23817146, End=23818935) region = self.chicken.getRegion(**coord) # repeat is recorded at contig level, strand is 0 repeats = region.getFeatures(feature_types = 'repeat') expect = [("9", 23817293, 23817321), ("9", 23817803, 23817812), ("9", 23817963, 23817972)] obs = [] for repeat in repeats: loc = repeat.Location obs.append((loc.CoordName, loc.Start, loc.End)) self.assertEquals(set(obs), set(expect)) def test_cpg(self): # contain 3 CpG island recorded at chromosome level coord1 = dict(CoordName=26, Start=110000, End=190000) cpgs1 = self.chicken.getFeatures(feature_types = 'cpg', **coord1) exp = [("26", 116969, 117955), ("26", 139769, 140694), ("26", 184546, 185881)] obs = [] for cpg in cpgs1: loc = cpg.Location obs.append((loc.CoordName, loc.Start, loc.End)) self.assertEquals(set(exp), set(obs)) # test cpg features record at supercontig level: coord2 = dict(CoordName='Un_random', Start=29434117, End=29439117) cpgs2 = self.chicken.getFeatures(feature_types='cpg', **coord2) self.assertEquals(len(list(cpgs2)), 1)
def _attach_genomes(self): for species in self.Species: attr_name = _Species.getComparaName(species) genome = Genome(Species=species, Release=self.Release, account=self._account) self._genomes[species] = genome setattr(self, attr_name, genome)
def setUp(self): self.chicken = Genome(Species='chicken', Release=Release, account=account)
class TestGenome(GenomeTestBase): def test_other_features(self): """should correctly return record for ENSESTG00000035043""" est = self.human.getEstMatching(StableId='ENSESTG00000035043') direct = list(est)[0] ests = self.human.getFeatures(feature_types='est', CoordName=8, Start=121470000, End=121600000) stable_ids = [est.StableId for est in ests] self.assertContains(stable_ids, direct.StableId) def test_genome_comparison(self): """different genome instances with same CoreDb connection are equal""" h2 = Genome(Species='human', Release=Release, account=account) self.assertEquals(self.human, h2) def test_make_location(self): """should correctly make a location for an entire chromosome""" loc = self.human.makeLocation(CoordName=1) self.assertEquals(len(loc), 249250621) def test_get_region(self): """should return a generic region that extracts correct sequence""" chrom = 1 Start = 11137 End = Start+20 region = self.human.getRegion(CoordName=chrom, Start=Start, End=End, ensembl_coord=True) self.assertEquals(region.Location.Start, Start-1) self.assertEquals(region.Location.End, End) self.assertEquals(region.Location.CoordName, str(chrom)) self.assertEquals(region.Location.CoordType, 'chromosome') self.assertEquals(region.Seq, 'ACCTCAGTAATCCGAAAAGCC') def test_get_assembly_exception_region(self): """should return correct sequence for region with an assembly exception""" ##old:chrY:57767412-57767433; New: chrY:59358024-59358045 region = self.human.getRegion(CoordName = "Y", Start = 59358024, End = 59358045, Strand = 1, ensembl_coord = True) self.assertEquals(str(region.Seq), 'CGAGGACGACTGGGAATCCTAG') def test_no_assembly(self): """return N's for coordinates with no assembly""" krat = Genome('Kangaroo rat', Release=58) Start=24385 End=Start+100 region = krat.getRegion(CoordName='scaffold_13754', Start=Start, End=End) self.assertEquals(str(region.Seq), 'N' * (End-Start)) def test_getting_annotated_seq(self): """a region should return a sequence with the correct annotation""" new_loc = self.brca2.Location.resized(-100, 100) region = self.human.getRegion(region=new_loc) annot_seq = region.getAnnotatedSeq(feature_types='gene') gene_annots = annot_seq.getAnnotationsMatching('gene') self.assertEquals(gene_annots[0].Name, self.brca2.Symbol) def test_correct_feature_type_id_cache(self): """should obtain the feature type identifiers without failure""" self.assertNotEquals(self.human._feature_type_ids.CpGisland, None) def test_strand_conversion(self): """should consistently convert strand info""" self.assertEquals(convert_strand(None), 1) self.assertEquals(convert_strand(-1), -1) self.assertEquals(convert_strand(1), 1) self.assertEquals(convert_strand('-'), -1) self.assertEquals(convert_strand('+'), 1) self.assertEquals(convert_strand(-1.0), -1) self.assertEquals(convert_strand(1.0), 1) def test_pool_connection(self): """excercising ability to specify pool connection""" dog = Genome(Species="dog", Release=Release, account=account, pool_recycle=1000) def test_gorilla(self): """should correctly return a gorilla gene""" self.gorilla = Genome(Species="gorilla", Release=Release, account=account) gene = self.gorilla.getGeneByStableId('ENSGGOG00000005730') self.assertEquals(str(gene.Seq[:10]), 'TGGGAGTCCA') def test_diff_strand_contig_chrom(self): """get correct sequence when contig and chromosome strands differ""" gene = self.gorilla.getGeneByStableId('ENSGGOG00000001953') cds = gene.CanonicalTranscript.Cds self.assertEquals(str(cds), 'ATGGCCCAGGATCTCAGCGAGAAGGACCTGTTGAAGATG' 'GAGGTGGAGCAGCTGAAGAAAGAAGTGAAAAACACAAGAATTCCGATTTCCAAAGCGGGAAAGGAAAT' 'CAAAGAGTACGTGGAGGCCCAAGCAGGAAACGATCCTTTTCTCAAAGGCATCCCTGAGGACAAGAATC' 'CCTTCAAGGAGAAAGGTGGCTGTCTGATAAGCTGA') def test_get_distinct_biotype(self): """Genome instance getDistinct for biotype should work on all genomes""" for genome in self.gorilla, self.human, self.mouse, self.rat, self.macaq: biotypes = genome.getDistinct('biotype') def test_get_distinct_effect(self): """Genome instance getDistinct for SNP effect should work on all genomes""" for genome in self.human, self.mouse, self.rat, self.macaq: biotypes = genome.getDistinct('effect')
__email__ = "*****@*****.**" __status__ = "alpha" Release = 76 if 'ENSEMBL_ACCOUNT' in os.environ: args = os.environ['ENSEMBL_ACCOUNT'].split() host, username, password = args[0:3] kwargs = {} if len(args) > 3: kwargs['port'] = int(args[3]) account = HostAccount(host, username, password, **kwargs) else: account = get_ensembl_account(release=Release) human = Genome(Species='human', Release=Release, account=account) platypus = Genome(Species='platypus', Release=Release, account=account) class TestLocation(TestCase): def test_init(self): human_loc = Coordinate(CoordName='x', Start=1000, End=10000, Strand=-1, genome=human) # TODO: complete test for platpus self.assertEqual(human_loc.CoordType, 'chromosome') self.assertEqual(human_loc.CoordName, 'x') self.assertEqual(human_loc.Start, 1000) self.assertEqual(human_loc.End, 10000)
class GenomeTestBase(TestCase): human = Genome(Species="human", Release=Release, account=account) mouse = Genome(Species="mouse", Release=Release, account=account) rat = Genome(Species="rat", Release=Release, account=account) macaq = Genome(Species="macaque", Release=Release, account=account) brca2 = list(human.getGenesMatching(StableId="ENSG00000139618"))[0]
def test_pool_connection(self): """excercising ability to specify pool connection""" dog = Genome(Species="dog", Release=Release, account=account, pool_recycle=1000)
def test_genome_comparison(self): """different genome instances with same CoreDb connection are equal""" h2 = Genome(Species='human', Release=Release, account=account) self.assertEquals(self.human, h2)
class TestGenome(GenomeTestBase): def test_other_features(self): """should correctly return record for ENSESTG00000000010""" est = self.human.getEstMatching(StableId='ENSESTG00000000010') direct = list(est)[0] ests = self.human.getFeatures(feature_types='est', CoordName=6, Start=99994000, End=100076519) stable_ids = [est.StableId for est in ests] self.assertContains(stable_ids, direct.StableId) def test_genome_comparison(self): """different genome instances with same CoreDb connection are equal""" h2 = Genome(Species='human', Release=Release, account=account) self.assertEquals(self.human, h2) def test_make_location(self): """should correctly make a location for an entire chromosome""" loc = self.human.makeLocation(CoordName=1) self.assertEquals(len(loc), 248956422) def test_get_region(self): """should return a generic region that extracts correct sequence""" chrom = 1 Start = 11137 End = Start + 20 region = self.human.getRegion(CoordName=chrom, Start=Start, End=End, ensembl_coord=True) self.assertEquals(region.Location.Start, Start - 1) self.assertEquals(region.Location.End, End) self.assertEquals(region.Location.CoordName, str(chrom)) self.assertEquals(region.Location.CoordType, 'chromosome') self.assertEquals(region.Seq, 'ACCTCAGTAATCCGAAAAGCC') def test_get_assembly_exception_region(self): """should return correct sequence for region with an assembly exception""" region = self.human.getRegion(CoordName="Y", Start=57211873, End=57211894, Strand=1, ensembl_coord=True) self.assertEquals(str(region.Seq), 'CGAGGACGACTGGGAATCCTAG') def test_no_assembly(self): """return N's for coordinates with no assembly""" krat = Genome('Kangaroo rat', Release=58) Start = 24385 End = Start + 100 region = krat.getRegion(CoordName='scaffold_13754', Start=Start, End=End) self.assertEquals(str(region.Seq), 'N' * (End - Start)) def test_getting_annotated_seq(self): """a region should return a sequence with the correct annotation""" new_loc = self.brca2.Location.resized(-100, 100) region = self.human.getRegion(region=new_loc) annot_seq = region.getAnnotatedSeq(feature_types='gene') gene_annots = annot_seq.getAnnotationsMatching('gene') self.assertEquals(gene_annots[0].Name, self.brca2.Symbol) def test_correct_feature_type_id_cache(self): """should obtain the feature type identifiers without failure""" self.assertNotEquals(self.human._feature_type_ids.CpGisland, None) def test_strand_conversion(self): """should consistently convert strand info""" self.assertEquals(convert_strand(None), 1) self.assertEquals(convert_strand(-1), -1) self.assertEquals(convert_strand(1), 1) self.assertEquals(convert_strand('-'), -1) self.assertEquals(convert_strand('+'), 1) self.assertEquals(convert_strand(-1.0), -1) self.assertEquals(convert_strand(1.0), 1) def test_pool_connection(self): """excercising ability to specify pool connection""" dog = Genome(Species="dog", Release=Release, account=account, pool_recycle=1000) def test_gorilla(self): """should correctly return a gorilla gene""" self.gorilla = Genome(Species="gorilla", Release=Release, account=account) gene = self.gorilla.getGeneByStableId('ENSGGOG00000005730') self.assertEquals(str(gene.Seq[:10]), 'TGGGAGTCCA') def test_diff_strand_contig_chrom(self): """get correct sequence when contig and chromosome strands differ""" gene = self.gorilla.getGeneByStableId('ENSGGOG00000001953') cds = gene.CanonicalTranscript.Cds self.assertEquals( str(cds), 'ATGGCCCAGGATCTCAGCGAGAAGGACCTGTTGAAGATG' 'GAGGTGGAGCAGCTGAAGAAAGAAGTGAAAAACACAAGAATTCCGATTTCCAAAGCGGGAAAGGAAAT' 'CAAAGAGTACGTGGAGGCCCAAGCAGGAAACGATCCTTTTCTCAAAGGCATCCCTGAGGACAAGAATC' 'CCTTCAAGGAGAAAGGTGGCTGTCTGATAAGCTGA') def test_get_distinct_biotype(self): """Genome instance getDistinct for biotype should work on all genomes""" for genome in self.gorilla, self.human, self.mouse, self.rat, self.macaq: biotypes = genome.getDistinct('biotype') def test_get_distinct_effect(self): """Genome instance getDistinct for SNP effect should work on all genomes""" for genome in self.human, self.mouse, self.rat, self.macaq: biotypes = genome.getDistinct('effect')