示例#1
0
 def __init__(self, genome, CoordName, Start, End, Strand = 1,
         CoordType = None, seq_region_id = None, ensembl_coord=False):
     if not CoordType or not (seq_region_id or Start or End):
         seq_region_data, CoordType = \
             _get_coord_type_and_seq_region_id(CoordName, genome.CoreDb)
         seq_region_id = seq_region_data['seq_region_id']
         Start = Start or 0
         End = End or seq_region_data['length']
     # TODO allow creation with just seq_region_id
     self.Species = genome.Species
     self.CoordType = DisplayString(CoordType, repr_length=4,
                                    with_quotes=False)
     self.CoordName = DisplayString(CoordName, repr_length=4,
                                    with_quotes=False)
     # if Start == End, we +1 to End, unless these are ensembl_coord's
     if ensembl_coord:
         Start -= 1
     elif Start == End:
         End += 1
     
     if Start > End:
         assert Strand == -1,\
                 "strand incorrect for start[%s] > end[%s]" % (Start, End)
         Start, End = End, Start
     
     self.Start = Start
     self.End = End
     self.Strand = convert_strand(Strand)
     self.seq_region_id = seq_region_id
     self.genome = genome
示例#2
0
    def getRegion(self,
                  region=None,
                  CoordName=None,
                  Start=None,
                  End=None,
                  Strand=None,
                  ensembl_coord=False):
        """returns a single generic region for the specified coordinates
        Arguments:
            - region: a genomic region or a Coordinate instance
            - ensembl_coords: if True, follows indexing system of Ensembl
              where indexing starts at 1"""
        if region is None:
            seq_region_id = self._get_seq_region_id(CoordName)
            region = Coordinate(self,
                                CoordName=CoordName,
                                Start=Start,
                                End=End,
                                Strand=convert_strand(Strand),
                                seq_region_id=seq_region_id,
                                ensembl_coord=ensembl_coord)
        elif hasattr(region, 'Location'):
            region = region.Location

        return GenericRegion(self,
                             self.CoreDb,
                             CoordName=CoordName,
                             Start=Start,
                             End=End,
                             Strand=Strand,
                             Location=region,
                             ensembl_coord=ensembl_coord)
示例#3
0
 def __init__(self, genome, CoordName, Start, End, Strand = 1,
         CoordType = None, seq_region_id = None, ensembl_coord=False):
     if not CoordType or not (seq_region_id or Start or End):
         seq_region_data, CoordType = \
             _get_coord_type_and_seq_region_id(CoordName, genome.CoreDb)
         seq_region_id = seq_region_data['seq_region_id']
         Start = Start or 0
         End = End or seq_region_data['length']
     # TODO allow creation with just seq_region_id
     self.Species = genome.Species
     self.CoordType = DisplayString(CoordType, repr_length=4,
                                    with_quotes=False)
     self.CoordName = DisplayString(CoordName, repr_length=4,
                                    with_quotes=False)
     # if Start == End, we +1 to End, unless these are ensembl_coord's
     if ensembl_coord:
         Start -= 1
     elif Start == End:
         End += 1
     
     if Start > End:
         assert Strand == -1,\
                 "strand incorrect for start[%s] > end[%s]" % (Start, End)
         Start, End = End, Start
     
     self.Start = Start
     self.End = End
     self.Strand = convert_strand(Strand)
     self.seq_region_id = seq_region_id
     self.genome = genome
示例#4
0
    def getRegion(self, region=None, CoordName=None, Start=None, End=None, Strand=None, ensembl_coord=False):
        """returns a single generic region for the specified coordinates
        Arguments:
            - region: a genomic region or a Coordinate instance
            - ensembl_coords: if True, follows indexing system of Ensembl
              where indexing starts at 1"""
        if region is None:
            seq_region_id = self._get_seq_region_id(CoordName)
            region = Coordinate(
                self,
                CoordName=CoordName,
                Start=Start,
                End=End,
                Strand=convert_strand(Strand),
                seq_region_id=seq_region_id,
                ensembl_coord=ensembl_coord,
            )
        elif hasattr(region, "Location"):
            region = region.Location

        return GenericRegion(
            self,
            self.CoreDb,
            CoordName=CoordName,
            Start=Start,
            End=End,
            Strand=Strand,
            Location=region,
            ensembl_coord=ensembl_coord,
        )
示例#5
0
    def getFeatures(self,
                    region=None,
                    feature_types=None,
                    where_feature=None,
                    CoordName=None,
                    Start=None,
                    End=None,
                    Strand=None,
                    ensembl_coord=False):
        """returns Region instances for the specified location"""
        if isinstance(feature_types, str):
            feature_types = [feature_types]
        feature_types = [ft.lower() for ft in feature_types]
        feature_coord_levels = self._get_feature_coord_levels(feature_types)

        if region is None:
            seq_region_id = self._get_seq_region_id(CoordName)
            region = Coordinate(self,
                                CoordName=CoordName,
                                Start=Start,
                                End=End,
                                Strand=convert_strand(Strand),
                                seq_region_id=seq_region_id,
                                ensembl_coord=ensembl_coord)
        elif hasattr(region, 'Location'):
            region = region.Location

        coord = region
        # the coordinate system at which locations are to be referenced, and
        # the processing function
        target_coords_funcs = \
            dict(cpg = (self._get_simple_features, CpGisland),
                 repeat = (self._get_repeat_features, Repeat),
                 gene = (self._get_gene_features, Gene),
                 est = (self._get_gene_features, Est),
                 variation = (self._get_variation_features, Variation))

        known_types = set(target_coords_funcs.keys())
        if not set(feature_types) <= known_types:
            raise RuntimeError, 'Unknown feature[%s], valid feature_types \
                are: %s' % (set(feature_types) ^ known_types, known_types)

        for feature_type in feature_types:
            target_func, target_class = target_coords_funcs[feature_type]
            db = self.CoreDb
            if feature_type == 'est':
                db = self.OtherFeaturesDb

            feature_coords = feature_coord_levels[feature_type].levels
            for feature_coord in feature_coords:
                chrom_other_coords = get_coord_conversion(coord,
                                                          feature_coord,
                                                          db,
                                                          where=where_feature)
                for chrom_coord, other_coord in chrom_other_coords:
                    for region in target_func(db, target_class, chrom_coord,
                                              other_coord, where_feature):
                        yield region
示例#6
0
 def test_strand_conversion(self):
     """should consistently convert strand info"""
     self.assertEquals(convert_strand(None), 1)
     self.assertEquals(convert_strand(-1), -1)
     self.assertEquals(convert_strand(1), 1)
     self.assertEquals(convert_strand('-'), -1)
     self.assertEquals(convert_strand('+'), 1)
     self.assertEquals(convert_strand(-1.0), -1)
     self.assertEquals(convert_strand(1.0), 1)
示例#7
0
 def test_strand_conversion(self):
     """should consistently convert strand info"""
     self.assertEquals(convert_strand(None), 1)
     self.assertEquals(convert_strand(-1), -1)
     self.assertEquals(convert_strand(1), 1)
     self.assertEquals(convert_strand('-'), -1)
     self.assertEquals(convert_strand('+'), 1)
     self.assertEquals(convert_strand(-1.0), -1)
     self.assertEquals(convert_strand(1.0), 1)
示例#8
0
文件: genome.py 项目: miklou/pycogent
 def getFeatures(self, region=None, feature_types=None, where_feature=None,
                 CoordName=None, Start=None, End=None, Strand=None,
                 ensembl_coord=False):
     """returns Region instances for the specified location"""
     if isinstance(feature_types, str):
         feature_types = [feature_types]
     feature_types = [ft.lower() for ft in feature_types]
     feature_coord_levels = self._get_feature_coord_levels(feature_types)
     
     if region is None:
         seq_region_id = self._get_seq_region_id(CoordName)
         region = Coordinate(self,CoordName=CoordName, Start=Start,
                     End=End,
                     Strand = convert_strand(Strand),
                     seq_region_id=seq_region_id,
                     ensembl_coord=ensembl_coord)
     elif hasattr(region, 'Location'):
         region = region.Location
     
     coord = region
     # the coordinate system at which locations are to be referenced, and
     # the processing function
     target_coords_funcs = \
         dict(cpg = (self._get_simple_features, CpGisland),
              repeat = (self._get_repeat_features, Repeat),
              gene = (self._get_gene_features, Gene),
              est = (self._get_gene_features, Est),
              variation = (self._get_variation_features, Variation))
     
     known_types = set(target_coords_funcs.keys())
     if not set(feature_types) <= known_types:
         raise RuntimeError, 'Unknown feature[%s], valid feature_types \
             are: %s' % (set(feature_types)^known_types, known_types)
     
     for feature_type in feature_types:
         target_func, target_class = target_coords_funcs[feature_type]
         db = self.CoreDb
         if feature_type == 'est':
             db = self.OtherFeaturesDb
         
         feature_coords = feature_coord_levels[feature_type].levels
         for feature_coord in feature_coords:
             chrom_other_coords = get_coord_conversion(coord, feature_coord,
                                         db, where=where_feature)
             for chrom_coord, other_coord in chrom_other_coords:
                 for region in target_func(db, target_class, chrom_coord,
                                         other_coord, where_feature):
                     yield region