Exemplo n.º 1
0
 def test_adopted(self):
     """coordinate should correctly adopt seq_region_id properties of 
     provided coordinate"""
     CoordName, Start, End, Strand = '1', 1000, 1000000, 1
     c1 = Coordinate(CoordName=CoordName,
                     Start=Start,
                     End=End,
                     Strand=Strand,
                     genome=human)
     CoordName, Start, End, Strand = '2', 2000, 2000000, 1
     c2 = Coordinate(CoordName=CoordName,
                     Start=Start,
                     End=End,
                     Strand=Strand,
                     genome=human)
     c3 = c1.adopted(c2)
     self.assertEqual(c3.CoordName, c2.CoordName)
     self.assertEqual(c3.CoordType, c2.CoordType)
     self.assertEqual(c3.seq_region_id, c2.seq_region_id)
     self.assertEqual(c3.Start, c1.Start)
     self.assertEqual(c3.End, c1.End)
     self.assertEqual(c3.Strand, c1.Strand)
     c3 = c1.adopted(c2, shift=100)
     self.assertEqual(c3.Start, c1.Start + 100)
     self.assertEqual(c3.End, c1.End + 100)
Exemplo n.º 2
0
 def test_coord_shift(self):
     """adding coordinates should produce correct results"""
     CoordName, Start, End, Strand = '1', 1000, 1000000, 1
     loc1 = Coordinate(CoordName = CoordName, Start = Start, End = End,
                     Strand = Strand, genome = human)
     for shift in [100, -100]:
         loc2 = loc1.shifted(shift)
         self.assertEqual(loc2.Start, loc1.Start+shift)
         self.assertEqual(loc2.End, loc1.End+shift)
         self.assertEqual(id(loc1.genome), id(loc2.genome))
     self.assertNotEqual(id(loc1), id(loc2))
Exemplo n.º 3
0
 def test_coord_resize(self):
     """resizing should work"""
     CoordName, Start, End, Strand = '1', 1000, 1000000, 1
     loc1 = Coordinate(CoordName = CoordName, Start = Start, End = End,
                     Strand = Strand, genome = human)
     front_shift = -100
     back_shift = 100
     loc2 = loc1.resized(front_shift, back_shift)
     self.assertEqual(len(loc2), len(loc1)+200)
     self.assertEqual(loc2.Start, loc1.Start+front_shift)
     self.assertEqual(loc2.End, loc1.End+back_shift)
     self.assertEqual(loc1.Strand, loc2.Strand)
Exemplo n.º 4
0
 def test_coord_shift(self):
     """adding coordinates should produce correct results"""
     CoordName, Start, End, Strand = '1', 1000, 1000000, 1
     loc1 = Coordinate(CoordName=CoordName,
                       Start=Start,
                       End=End,
                       Strand=Strand,
                       genome=human)
     for shift in [100, -100]:
         loc2 = loc1.shifted(shift)
         self.assertEqual(loc2.Start, loc1.Start + shift)
         self.assertEqual(loc2.End, loc1.End + shift)
         self.assertEqual(id(loc1.genome), id(loc2.genome))
     self.assertNotEqual(id(loc1), id(loc2))
Exemplo n.º 5
0
 def test_coord_resize(self):
     """resizing should work"""
     CoordName, Start, End, Strand = '1', 1000, 1000000, 1
     loc1 = Coordinate(CoordName=CoordName,
                       Start=Start,
                       End=End,
                       Strand=Strand,
                       genome=human)
     front_shift = -100
     back_shift = 100
     loc2 = loc1.resized(front_shift, back_shift)
     self.assertEqual(len(loc2), len(loc1) + 200)
     self.assertEqual(loc2.Start, loc1.Start + front_shift)
     self.assertEqual(loc2.End, loc1.End + back_shift)
     self.assertEqual(loc1.Strand, loc2.Strand)
Exemplo n.º 6
0
    def _get_gene_features(self, db, klass, target_coord, query_coord,
                           where_feature):
        """returns all genes"""
        xref_table = [None, db.getTable('xref')][db.Type == 'core']
        gene_table = db.getTable('gene')

        # after release 65, the gene_id_table is removed. The following is to maintain
        # support for earlier releases.
        if self.GeneralRelease >= 65:
            gene_id_table = None
        else:
            gene_id_table = db.getTable('gene_stable_id')

        # note gene records are at chromosome, not contig, level
        condition = gene_table.c.seq_region_id == query_coord.seq_region_id
        query = self._build_gene_query(db, condition, gene_table,
                                       gene_id_table, xref_table)
        query = location_query(gene_table,
                               query_coord.EnsemblStart,
                               query_coord.EnsemblEnd,
                               query=query,
                               where=where_feature)

        for record in query.execute():
            new = Coordinate(self,
                             CoordName=query_coord.CoordName,
                             Start=record['seq_region_start'],
                             End=record['seq_region_end'],
                             Strand=record['seq_region_strand'],
                             seq_region_id=record['seq_region_id'],
                             ensembl_coord=True)

            gene = klass(self, db, Location=new, data=record)
            yield gene
Exemplo n.º 7
0
    def getRegion(self,
                  region=None,
                  CoordName=None,
                  Start=None,
                  End=None,
                  Strand=None,
                  ensembl_coord=False):
        """returns a single generic region for the specified coordinates
        Arguments:
            - region: a genomic region or a Coordinate instance
            - ensembl_coords: if True, follows indexing system of Ensembl
              where indexing starts at 1"""
        if region is None:
            seq_region_id = self._get_seq_region_id(CoordName)
            region = Coordinate(self,
                                CoordName=CoordName,
                                Start=Start,
                                End=End,
                                Strand=convert_strand(Strand),
                                seq_region_id=seq_region_id,
                                ensembl_coord=ensembl_coord)
        elif hasattr(region, 'Location'):
            region = region.Location

        return GenericRegion(self,
                             self.CoreDb,
                             CoordName=CoordName,
                             Start=Start,
                             End=End,
                             Strand=Strand,
                             Location=region,
                             ensembl_coord=ensembl_coord)
Exemplo n.º 8
0
 def _get_repeat_features(self, db, klass, target_coord, query_coord,
                          where_feature):
     """returns Repeat region instances"""
     # we build repeats using coordinates from repeat_feature table
     # the repeat_consensus_id is required to get the repeat name, class
     # and type
     repeat_feature_table = db.getTable('repeat_feature')
     query = sql.select(
         [repeat_feature_table],
         repeat_feature_table.c.seq_region_id == query_coord.seq_region_id)
     query = location_query(repeat_feature_table,
                            query_coord.EnsemblStart,
                            query_coord.EnsemblEnd,
                            query=query,
                            where=where_feature)
     for record in query.execute():
         coord = Coordinate(self,
                            CoordName=query_coord.CoordName,
                            Start=record['seq_region_start'],
                            End=record['seq_region_end'],
                            seq_region_id=record['seq_region_id'],
                            Strand=record['seq_region_strand'],
                            ensembl_coord=True)
         if query_coord.CoordName != target_coord.CoordName:
             coord = asserted_one(
                 get_coord_conversion(coord, target_coord.CoordType,
                                      self.CoreDb))[1]
         # coord = coord.makeRelativeTo(query_coord) #TODO: fix here if query_coord and target_coord have different coordName
         # coord = coord.makeRelativeTo(target_coord, False)
         yield klass(self,
                     db,
                     Location=coord,
                     Score=record['score'],
                     data=record)
Exemplo n.º 9
0
def _make_coord(genome, coord_name, start, end, strand):
    """returns a Coordinate"""
    return Coordinate(CoordName=coord_name,
                      Start=start,
                      End=end,
                      Strand=strand,
                      genome=genome)
Exemplo n.º 10
0
    def getFeatures(self,
                    region=None,
                    feature_types=None,
                    where_feature=None,
                    CoordName=None,
                    Start=None,
                    End=None,
                    Strand=None,
                    ensembl_coord=False):
        """returns Region instances for the specified location"""
        if isinstance(feature_types, str):
            feature_types = [feature_types]
        feature_types = [ft.lower() for ft in feature_types]
        feature_coord_levels = self._get_feature_coord_levels(feature_types)

        if region is None:
            seq_region_id = self._get_seq_region_id(CoordName)
            region = Coordinate(self,
                                CoordName=CoordName,
                                Start=Start,
                                End=End,
                                Strand=convert_strand(Strand),
                                seq_region_id=seq_region_id,
                                ensembl_coord=ensembl_coord)
        elif hasattr(region, 'Location'):
            region = region.Location

        coord = region
        # the coordinate system at which locations are to be referenced, and
        # the processing function
        target_coords_funcs = \
            dict(cpg = (self._get_simple_features, CpGisland),
                 repeat = (self._get_repeat_features, Repeat),
                 gene = (self._get_gene_features, Gene),
                 est = (self._get_gene_features, Est),
                 variation = (self._get_variation_features, Variation))

        known_types = set(target_coords_funcs.keys())
        if not set(feature_types) <= known_types:
            raise RuntimeError, 'Unknown feature[%s], valid feature_types \
                are: %s' % (set(feature_types) ^ known_types, known_types)

        for feature_type in feature_types:
            target_func, target_class = target_coords_funcs[feature_type]
            db = self.CoreDb
            if feature_type == 'est':
                db = self.OtherFeaturesDb

            feature_coords = feature_coord_levels[feature_type].levels
            for feature_coord in feature_coords:
                chrom_other_coords = get_coord_conversion(coord,
                                                          feature_coord,
                                                          db,
                                                          where=where_feature)
                for chrom_coord, other_coord in chrom_other_coords:
                    for region in target_func(db, target_class, chrom_coord,
                                              other_coord, where_feature):
                        yield region
Exemplo n.º 11
0
 def test_adopted(self):
     """coordinate should correctly adopt seq_region_id properties of 
     provided coordinate"""
     CoordName, Start, End, Strand = '1', 1000, 1000000, 1
     c1 = Coordinate(CoordName = CoordName, Start = Start, End = End,
                     Strand = Strand, genome = human)
     CoordName, Start, End, Strand = '2', 2000, 2000000, 1
     c2 = Coordinate(CoordName = CoordName, Start = Start, End = End,
                     Strand = Strand, genome = human)
     c3 = c1.adopted(c2)
     self.assertEqual(c3.CoordName, c2.CoordName)
     self.assertEqual(c3.CoordType, c2.CoordType)
     self.assertEqual(c3.seq_region_id, c2.seq_region_id)
     self.assertEqual(c3.Start, c1.Start)
     self.assertEqual(c3.End, c1.End)
     self.assertEqual(c3.Strand, c1.Strand)
     c3 = c1.adopted(c2, shift = 100)
     self.assertEqual(c3.Start, c1.Start+100)
     self.assertEqual(c3.End, c1.End+100)
Exemplo n.º 12
0
 def makeLocation(self,
                  CoordName,
                  Start=None,
                  End=None,
                  Strand=1,
                  ensembl_coord=False):
     """returns a location in the genome"""
     return Coordinate(self,
                       CoordName=CoordName,
                       Start=Start,
                       End=End,
                       Strand=Strand,
                       ensembl_coord=ensembl_coord)
Exemplo n.º 13
0
 def test_init(self):
     human_loc = Coordinate(CoordName='x',
                            Start=1000,
                            End=10000,
                            Strand=-1,
                            genome=human)
     # TODO: complete test for platpus
     self.assertEqual(human_loc.CoordType, 'chromosome')
     self.assertEqual(human_loc.CoordName, 'x')
     self.assertEqual(human_loc.Start, 1000)
     self.assertEqual(human_loc.End, 10000)
     self.assertEqual(human_loc.Strand, -1)
     self.assertEqual(human_loc.Species, "H**o sapiens")
     self.assertEqual(human_loc.seq_region_id, 131539)
Exemplo n.º 14
0
 def test_get_coord_conversion(self):
     """should correctly map between different coordinate levels"""
     # not really testing the contig coordinates are correct
     CoordName, Start, End, Strand = '1', 1000, 1000000, 1
     human_loc = Coordinate(CoordName=CoordName,
                            Start=Start,
                            End=End,
                            Strand=Strand,
                            genome=human)
     results = get_coord_conversion(human_loc, 'contig', human.CoreDb)
     for result in results:
         self.assertTrue(result[0].CoordName == CoordName)
         self.assertTrue(result[0].Start >= Start)
         self.assertTrue(result[0].End <= End)
         self.assertTrue(result[0].Strand == Strand)
Exemplo n.º 15
0
    def _get_simple_features(self, db, klass, target_coord, query_coord,
                             where_feature):
        """returns feature_type records for the query_coord from the
        simple_feature table. The returned coord is referenced to
        target_coord. At present, only CpG islands being queried."""
        simple_feature_table = db.getTable('simple_feature')
        feature_types = ['CpGisland']
        feature_type_ids = [
            self._feature_type_ids.get(f) for f in feature_types
        ]
        # fix the following
        query = sql.select(
            [simple_feature_table],
            sql.and_(
                simple_feature_table.c.analysis_id.in_(feature_type_ids),
                simple_feature_table.c.seq_region_id ==
                query_coord.seq_region_id))
        query = location_query(simple_feature_table,
                               query_coord.EnsemblStart,
                               query_coord.EnsemblEnd,
                               query=query,
                               where=where_feature)
        records = query.execute()
        for record in records:
            coord = Coordinate(self,
                               CoordName=query_coord.CoordName,
                               Start=record['seq_region_start'],
                               End=record['seq_region_end'],
                               seq_region_id=record['seq_region_id'],
                               Strand=record['seq_region_strand'],
                               ensembl_coord=True)
            if query_coord.CoordName != target_coord.CoordName:
                coord = asserted_one(
                    get_coord_conversion(coord, target_coord.CoordType,
                                         self.CoreDb))[1]

            # coord = coord.makeRelativeTo(query_coord) #TODO: fix here if query_coord and target_coord have different coordName
            # coord = coord.makeRelativeTo(target_coord, False)
            yield klass(self, db, Location=coord, Score=record['score'])