def test_adopted(self): """coordinate should correctly adopt seq_region_id properties of provided coordinate""" CoordName, Start, End, Strand = '1', 1000, 1000000, 1 c1 = Coordinate(CoordName=CoordName, Start=Start, End=End, Strand=Strand, genome=human) CoordName, Start, End, Strand = '2', 2000, 2000000, 1 c2 = Coordinate(CoordName=CoordName, Start=Start, End=End, Strand=Strand, genome=human) c3 = c1.adopted(c2) self.assertEqual(c3.CoordName, c2.CoordName) self.assertEqual(c3.CoordType, c2.CoordType) self.assertEqual(c3.seq_region_id, c2.seq_region_id) self.assertEqual(c3.Start, c1.Start) self.assertEqual(c3.End, c1.End) self.assertEqual(c3.Strand, c1.Strand) c3 = c1.adopted(c2, shift=100) self.assertEqual(c3.Start, c1.Start + 100) self.assertEqual(c3.End, c1.End + 100)
def test_coord_shift(self): """adding coordinates should produce correct results""" CoordName, Start, End, Strand = '1', 1000, 1000000, 1 loc1 = Coordinate(CoordName = CoordName, Start = Start, End = End, Strand = Strand, genome = human) for shift in [100, -100]: loc2 = loc1.shifted(shift) self.assertEqual(loc2.Start, loc1.Start+shift) self.assertEqual(loc2.End, loc1.End+shift) self.assertEqual(id(loc1.genome), id(loc2.genome)) self.assertNotEqual(id(loc1), id(loc2))
def test_coord_resize(self): """resizing should work""" CoordName, Start, End, Strand = '1', 1000, 1000000, 1 loc1 = Coordinate(CoordName = CoordName, Start = Start, End = End, Strand = Strand, genome = human) front_shift = -100 back_shift = 100 loc2 = loc1.resized(front_shift, back_shift) self.assertEqual(len(loc2), len(loc1)+200) self.assertEqual(loc2.Start, loc1.Start+front_shift) self.assertEqual(loc2.End, loc1.End+back_shift) self.assertEqual(loc1.Strand, loc2.Strand)
def test_coord_shift(self): """adding coordinates should produce correct results""" CoordName, Start, End, Strand = '1', 1000, 1000000, 1 loc1 = Coordinate(CoordName=CoordName, Start=Start, End=End, Strand=Strand, genome=human) for shift in [100, -100]: loc2 = loc1.shifted(shift) self.assertEqual(loc2.Start, loc1.Start + shift) self.assertEqual(loc2.End, loc1.End + shift) self.assertEqual(id(loc1.genome), id(loc2.genome)) self.assertNotEqual(id(loc1), id(loc2))
def test_coord_resize(self): """resizing should work""" CoordName, Start, End, Strand = '1', 1000, 1000000, 1 loc1 = Coordinate(CoordName=CoordName, Start=Start, End=End, Strand=Strand, genome=human) front_shift = -100 back_shift = 100 loc2 = loc1.resized(front_shift, back_shift) self.assertEqual(len(loc2), len(loc1) + 200) self.assertEqual(loc2.Start, loc1.Start + front_shift) self.assertEqual(loc2.End, loc1.End + back_shift) self.assertEqual(loc1.Strand, loc2.Strand)
def _get_gene_features(self, db, klass, target_coord, query_coord, where_feature): """returns all genes""" xref_table = [None, db.getTable('xref')][db.Type == 'core'] gene_table = db.getTable('gene') # after release 65, the gene_id_table is removed. The following is to maintain # support for earlier releases. if self.GeneralRelease >= 65: gene_id_table = None else: gene_id_table = db.getTable('gene_stable_id') # note gene records are at chromosome, not contig, level condition = gene_table.c.seq_region_id == query_coord.seq_region_id query = self._build_gene_query(db, condition, gene_table, gene_id_table, xref_table) query = location_query(gene_table, query_coord.EnsemblStart, query_coord.EnsemblEnd, query=query, where=where_feature) for record in query.execute(): new = Coordinate(self, CoordName=query_coord.CoordName, Start=record['seq_region_start'], End=record['seq_region_end'], Strand=record['seq_region_strand'], seq_region_id=record['seq_region_id'], ensembl_coord=True) gene = klass(self, db, Location=new, data=record) yield gene
def getRegion(self, region=None, CoordName=None, Start=None, End=None, Strand=None, ensembl_coord=False): """returns a single generic region for the specified coordinates Arguments: - region: a genomic region or a Coordinate instance - ensembl_coords: if True, follows indexing system of Ensembl where indexing starts at 1""" if region is None: seq_region_id = self._get_seq_region_id(CoordName) region = Coordinate(self, CoordName=CoordName, Start=Start, End=End, Strand=convert_strand(Strand), seq_region_id=seq_region_id, ensembl_coord=ensembl_coord) elif hasattr(region, 'Location'): region = region.Location return GenericRegion(self, self.CoreDb, CoordName=CoordName, Start=Start, End=End, Strand=Strand, Location=region, ensembl_coord=ensembl_coord)
def _get_repeat_features(self, db, klass, target_coord, query_coord, where_feature): """returns Repeat region instances""" # we build repeats using coordinates from repeat_feature table # the repeat_consensus_id is required to get the repeat name, class # and type repeat_feature_table = db.getTable('repeat_feature') query = sql.select( [repeat_feature_table], repeat_feature_table.c.seq_region_id == query_coord.seq_region_id) query = location_query(repeat_feature_table, query_coord.EnsemblStart, query_coord.EnsemblEnd, query=query, where=where_feature) for record in query.execute(): coord = Coordinate(self, CoordName=query_coord.CoordName, Start=record['seq_region_start'], End=record['seq_region_end'], seq_region_id=record['seq_region_id'], Strand=record['seq_region_strand'], ensembl_coord=True) if query_coord.CoordName != target_coord.CoordName: coord = asserted_one( get_coord_conversion(coord, target_coord.CoordType, self.CoreDb))[1] # coord = coord.makeRelativeTo(query_coord) #TODO: fix here if query_coord and target_coord have different coordName # coord = coord.makeRelativeTo(target_coord, False) yield klass(self, db, Location=coord, Score=record['score'], data=record)
def _make_coord(genome, coord_name, start, end, strand): """returns a Coordinate""" return Coordinate(CoordName=coord_name, Start=start, End=end, Strand=strand, genome=genome)
def getFeatures(self, region=None, feature_types=None, where_feature=None, CoordName=None, Start=None, End=None, Strand=None, ensembl_coord=False): """returns Region instances for the specified location""" if isinstance(feature_types, str): feature_types = [feature_types] feature_types = [ft.lower() for ft in feature_types] feature_coord_levels = self._get_feature_coord_levels(feature_types) if region is None: seq_region_id = self._get_seq_region_id(CoordName) region = Coordinate(self, CoordName=CoordName, Start=Start, End=End, Strand=convert_strand(Strand), seq_region_id=seq_region_id, ensembl_coord=ensembl_coord) elif hasattr(region, 'Location'): region = region.Location coord = region # the coordinate system at which locations are to be referenced, and # the processing function target_coords_funcs = \ dict(cpg = (self._get_simple_features, CpGisland), repeat = (self._get_repeat_features, Repeat), gene = (self._get_gene_features, Gene), est = (self._get_gene_features, Est), variation = (self._get_variation_features, Variation)) known_types = set(target_coords_funcs.keys()) if not set(feature_types) <= known_types: raise RuntimeError, 'Unknown feature[%s], valid feature_types \ are: %s' % (set(feature_types) ^ known_types, known_types) for feature_type in feature_types: target_func, target_class = target_coords_funcs[feature_type] db = self.CoreDb if feature_type == 'est': db = self.OtherFeaturesDb feature_coords = feature_coord_levels[feature_type].levels for feature_coord in feature_coords: chrom_other_coords = get_coord_conversion(coord, feature_coord, db, where=where_feature) for chrom_coord, other_coord in chrom_other_coords: for region in target_func(db, target_class, chrom_coord, other_coord, where_feature): yield region
def test_adopted(self): """coordinate should correctly adopt seq_region_id properties of provided coordinate""" CoordName, Start, End, Strand = '1', 1000, 1000000, 1 c1 = Coordinate(CoordName = CoordName, Start = Start, End = End, Strand = Strand, genome = human) CoordName, Start, End, Strand = '2', 2000, 2000000, 1 c2 = Coordinate(CoordName = CoordName, Start = Start, End = End, Strand = Strand, genome = human) c3 = c1.adopted(c2) self.assertEqual(c3.CoordName, c2.CoordName) self.assertEqual(c3.CoordType, c2.CoordType) self.assertEqual(c3.seq_region_id, c2.seq_region_id) self.assertEqual(c3.Start, c1.Start) self.assertEqual(c3.End, c1.End) self.assertEqual(c3.Strand, c1.Strand) c3 = c1.adopted(c2, shift = 100) self.assertEqual(c3.Start, c1.Start+100) self.assertEqual(c3.End, c1.End+100)
def makeLocation(self, CoordName, Start=None, End=None, Strand=1, ensembl_coord=False): """returns a location in the genome""" return Coordinate(self, CoordName=CoordName, Start=Start, End=End, Strand=Strand, ensembl_coord=ensembl_coord)
def test_init(self): human_loc = Coordinate(CoordName='x', Start=1000, End=10000, Strand=-1, genome=human) # TODO: complete test for platpus self.assertEqual(human_loc.CoordType, 'chromosome') self.assertEqual(human_loc.CoordName, 'x') self.assertEqual(human_loc.Start, 1000) self.assertEqual(human_loc.End, 10000) self.assertEqual(human_loc.Strand, -1) self.assertEqual(human_loc.Species, "H**o sapiens") self.assertEqual(human_loc.seq_region_id, 131539)
def test_get_coord_conversion(self): """should correctly map between different coordinate levels""" # not really testing the contig coordinates are correct CoordName, Start, End, Strand = '1', 1000, 1000000, 1 human_loc = Coordinate(CoordName=CoordName, Start=Start, End=End, Strand=Strand, genome=human) results = get_coord_conversion(human_loc, 'contig', human.CoreDb) for result in results: self.assertTrue(result[0].CoordName == CoordName) self.assertTrue(result[0].Start >= Start) self.assertTrue(result[0].End <= End) self.assertTrue(result[0].Strand == Strand)
def _get_simple_features(self, db, klass, target_coord, query_coord, where_feature): """returns feature_type records for the query_coord from the simple_feature table. The returned coord is referenced to target_coord. At present, only CpG islands being queried.""" simple_feature_table = db.getTable('simple_feature') feature_types = ['CpGisland'] feature_type_ids = [ self._feature_type_ids.get(f) for f in feature_types ] # fix the following query = sql.select( [simple_feature_table], sql.and_( simple_feature_table.c.analysis_id.in_(feature_type_ids), simple_feature_table.c.seq_region_id == query_coord.seq_region_id)) query = location_query(simple_feature_table, query_coord.EnsemblStart, query_coord.EnsemblEnd, query=query, where=where_feature) records = query.execute() for record in records: coord = Coordinate(self, CoordName=query_coord.CoordName, Start=record['seq_region_start'], End=record['seq_region_end'], seq_region_id=record['seq_region_id'], Strand=record['seq_region_strand'], ensembl_coord=True) if query_coord.CoordName != target_coord.CoordName: coord = asserted_one( get_coord_conversion(coord, target_coord.CoordType, self.CoreDb))[1] # coord = coord.makeRelativeTo(query_coord) #TODO: fix here if query_coord and target_coord have different coordName # coord = coord.makeRelativeTo(target_coord, False) yield klass(self, db, Location=coord, Score=record['score'])