Exemplo n.º 1
0
 def test_get_ensembl_format(self):
     """should take common or latin names and return the corresponding
     ensembl db prefix"""
     self.assertEqual(Species.getEnsemblDbPrefix("human"), "homo_sapiens")
     self.assertEqual(Species.getEnsemblDbPrefix("mouse"), "mus_musculus")
     self.assertEqual(Species.getEnsemblDbPrefix("Mus musculus"),
                      "mus_musculus")
Exemplo n.º 2
0
 def test_get_name_type(self):
     """should return the (latin|common) name given a latin, common or ensembl
     db prefix names"""
     self.assertEqual(Species.getSpeciesName("human"), "H**o sapiens")
     self.assertEqual(Species.getSpeciesName("homo_sapiens"), "H**o sapiens")
     self.assertEqual(Species.getCommonName("Mus musculus"), "Mouse")
     self.assertEqual(Species.getCommonName("mus_musculus"), "Mouse")
Exemplo n.º 3
0
 def test_get_ensembl_format(self):
     """should take common or latin names and return the corresponding
     ensembl db prefix"""
     self.assertEqual(Species.getEnsemblDbPrefix("human"), "homo_sapiens")
     self.assertEqual(Species.getEnsemblDbPrefix("mouse"), "mus_musculus")
     self.assertEqual(Species.getEnsemblDbPrefix("Mus musculus"),
                                             "mus_musculus")
Exemplo n.º 4
0
 def test_get_name_type(self):
     """should return the (latin|common) name given a latin, common or ensembl
     db prefix names"""
     self.assertEqual(Species.getSpeciesName("human"), "H**o sapiens")
     self.assertEqual(Species.getSpeciesName("homo_sapiens"),
                      "H**o sapiens")
     self.assertEqual(Species.getCommonName("Mus musculus"), "Mouse")
     self.assertEqual(Species.getCommonName("mus_musculus"), "Mouse")
Exemplo n.º 5
0
 def test_add_new_species(self):
     """should correctly add a new species/common combination and infer the
     correct ensembl prefix"""
     species_name, common_name = "Otolemur garnettii", "Bushbaby"
     Species.amendSpecies(species_name, common_name)
     self.assertEqual(Species.getSpeciesName(species_name), species_name)
     self.assertEqual(Species.getSpeciesName("Bushbaby"), species_name)
     self.assertEqual(Species.getSpeciesName(common_name), species_name)
     self.assertEqual(Species.getCommonName(species_name), common_name)
     self.assertEqual(Species.getCommonName("Bushbaby"), common_name)
     self.assertEqual(Species.getEnsemblDbPrefix("Bushbaby"), "otolemur_garnettii")
     self.assertEqual(Species.getEnsemblDbPrefix(species_name), "otolemur_garnettii")
     self.assertEqual(Species.getEnsemblDbPrefix(common_name), "otolemur_garnettii")
Exemplo n.º 6
0
    def __init__(self, Species, Release, account=None, pool_recycle=None):
        super(Genome, self).__init__()

        assert Release, 'invalid release specified'
        if account is None:
            account = get_ensembl_account(release=Release)

        self._account = account
        self._pool_recycle = pool_recycle

        # TODO: check Release may not be necessary because: assert Release above
        if Release is None:
            Release = get_latest_release(account=account)

        self._gen_release = None

        # TODO make name and release immutable properties
        self.Species = _Species.getSpeciesName(Species)
        self.Release = str(Release)

        # the db connections
        self._core_db = None
        self._var_db = None
        self._other_db = None
        self._feature_type_ids = FeatureTypeCache(self)
        self._feature_coord_levels = FeatureCoordLevels(self.Species)
Exemplo n.º 7
0
 def __init__(self, Species, Release, account=None, pool_recycle=None):
     super(Genome, self).__init__()
     
     assert Release, 'invalid release specified'
     if account is None:
         account = get_ensembl_account(release=Release)
     
     self._account = account
     self._pool_recycle = pool_recycle
     
     # TODO: check Release may not be necessary because: assert Release above
     if Release is None:
         Release = get_latest_release(account=account)
     
     self._gen_release = None
     
     # TODO make name and release immutable properties
     self.Species = _Species.getSpeciesName(Species)
     self.Release = str(Release)
     
     # the db connections
     self._core_db = None
     self._var_db = None
     self._other_db = None
     self._feature_type_ids = FeatureTypeCache(self)
     self._feature_coord_levels = FeatureCoordLevels(self.Species)
Exemplo n.º 8
0
 def __repr__(self):
     my_type = self.__class__.__name__
     name = _Species.getCommonName(self.Species)
     coord_type = self.CoordType
     c = '%s(%r,%r,%r,%d-%d,%d)'%(my_type, name, coord_type,
                 self.CoordName, self.Start, self.End, self.Strand)
     return c.replace("'", "")
Exemplo n.º 9
0
 def __repr__(self):
     my_type = self.__class__.__name__
     name = _Species.getCommonName(self.Species)
     coord_type = self.CoordType
     c = '%s(%r,%r,%r,%d-%d,%d)'%(my_type, name, coord_type,
                 self.CoordName, self.Start, self.End, self.Strand)
     return c.replace("'", "")
Exemplo n.º 10
0
 def _attach_genomes(self):
     for species in self.Species:
         attr_name = _Species.getComparaName(species)
         genome = Genome(Species=species, Release=self.Release,
                         account=self._account)
         self._genomes[species] = genome
         setattr(self, attr_name, genome)
Exemplo n.º 11
0
 def test_add_new_species(self):
     """should correctly add a new species/common combination and infer the
     correct ensembl prefix"""
     species_name, common_name = "Otolemur garnettii", "Bushbaby"
     Species.amendSpecies(species_name, common_name)
     self.assertEqual(Species.getSpeciesName(species_name), species_name)
     self.assertEqual(Species.getSpeciesName("Bushbaby"), species_name)
     self.assertEqual(Species.getSpeciesName(common_name), species_name)
     self.assertEqual(Species.getCommonName(species_name), common_name)
     self.assertEqual(Species.getCommonName("Bushbaby"), common_name)
     self.assertEqual(Species.getEnsemblDbPrefix("Bushbaby"),
                      "otolemur_garnettii")
     self.assertEqual(Species.getEnsemblDbPrefix(species_name),
                      "otolemur_garnettii")
     self.assertEqual(Species.getEnsemblDbPrefix(common_name),
                      "otolemur_garnettii")
Exemplo n.º 12
0
 def _attach_genomes(self):
     for species in self.Species:
         attr_name = _Species.getComparaName(species)
         genome = Genome(Species=species,
                         Release=self.Release,
                         account=self._account)
         self._genomes[species] = genome
         setattr(self, attr_name, genome)
Exemplo n.º 13
0
 def __call__(self,
              species=None,
              core_db=None,
              feature_types=None,
              var_db=None,
              otherfeature_db=None):
     if 'variation' in feature_types:
         assert var_db is not None
     species = _Species.getSpeciesName(core_db.db_name.Species or species)
     self._set_species_feature_levels(species, core_db, feature_types,
                                      var_db, otherfeature_db)
     return self._species_feature_levels[species]
Exemplo n.º 14
0
def make_db_name_pattern(species=None, db_type=None, release=None):
    """returns a pattern for matching the db name against"""
    sep = r"%"
    pattern = ""
    if species:
        species = Species.getEnsemblDbPrefix(species)
        pattern = "%s%s" % (sep, species)
    if db_type:
        pattern = "%s%s%s" % (pattern, sep, db_type)
    if release:
        pattern = "%s%s%s" % (pattern, sep, release)
    assert pattern

    return "'%s%s'" % (pattern, sep)
Exemplo n.º 15
0
def make_db_name_pattern(species=None, db_type=None, release=None):
    """returns a pattern for matching the db name against"""
    sep = r"%"
    pattern = ""
    if species:
        species = Species.getEnsemblDbPrefix(species)
        pattern = "%s%s" % (sep, species)
    if db_type:
        pattern = "%s%s%s" % (pattern, sep, db_type)
    if release:
        pattern = "%s%s%s" % (pattern, sep, release)
    assert pattern
    
    return "'%s%s'" % (pattern, sep)
Exemplo n.º 16
0
    def __init__(self, species, Release, account=None, pool_recycle=None, division=None):
        assert Release, "invalid release specified"
        self.Release = str(Release)
        if account is None:
            account = get_ensembl_account(release=Release)
        self._account = account
        self._pool_recycle = pool_recycle
        self._compara_db = None
        sp = sorted([_Species.getSpeciesName(sp) for sp in set(species)])
        self.Species = tuple(sp)
        self._genomes = {}
        self._attach_genomes()

        self._species_id_map = None
        self._species_db_map = None
        self._species_set = None
        self._method_species_link = None
        self.division = division
Exemplo n.º 17
0
 def __call__(self, coord_type = None, core_db = None, species = None,
              seq_level=False):
     """coord_type can be coord_type or coord_system_id"""
     # TODO should only pass in core_db here, not that and Species, or just
     # the genome - what if someone wants to compare different ensembl
     # releases? keying by species is then a bad idea! better to key by
     # id(object)
     # change identifier to coord_system, handle either string val or int
     # (see MySQL table) as is this shouldn't be a __call__, see line 168
     # for reason why we should have a method to set data: setSpeciesCoord
     # call then then just returns the coords for the named species
     species = _Species.getSpeciesName(species or core_db.db_name.Species)
     self._set_species_system(core_db, species)
     if seq_level:
         result = self._get_seq_level_system(species)
     elif coord_type:
         result = self._species_coord_systems[species][coord_type]
     else:
         result = self._species_coord_systems[species]
     return result
Exemplo n.º 18
0
 def __call__(self, coord_type = None, core_db = None, species = None,
              seq_level=False):
     """coord_type can be coord_type or coord_system_id"""
     # TODO should only pass in core_db here, not that and Species, or just
     # the genome - what if someone wants to compare different ensembl
     # releases? keying by species is then a bad idea! better to key by
     # id(object)
     # change identifier to coord_system, handle either string val or int
     # (see MySQL table) as is this shouldn't be a __call__, see line 168
     # for reason why we should have a method to set data: setSpeciesCoord
     # call then then just returns the coords for the named species
     species = _Species.getSpeciesName(species or core_db.db_name.Species)
     self._set_species_system(core_db, species)
     if seq_level:
         result = self._get_seq_level_system(species)
     elif coord_type:
         result = self._species_coord_systems[species][coord_type]
     else:
         result = self._species_coord_systems[species]
     return result
Exemplo n.º 19
0
 def _get_dnafrag_id_for_coord(self, coord):
     """returns the dnafrag_id for the coordnate"""
     dnafrag_table = self.ComparaDb.getTable('dnafrag')
     genome_db_table = self.ComparaDb.getTable('genome_db')
     
     # column renamed between versions
     prefix = coord.genome.Species.lower()
     if int(self.Release) > 58:
         prefix = _Species.getEnsemblDbPrefix(prefix)
     
     query = sql.select([dnafrag_table.c.dnafrag_id,
                        dnafrag_table.c.coord_system_name],
               sql.and_(dnafrag_table.c.genome_db_id ==\
                                         genome_db_table.c.genome_db_id,
                             genome_db_table.c.name == prefix,
                             dnafrag_table.c.name == coord.CoordName))
     try:
         record = asserted_one(query.execute().fetchall())
         dnafrag_id = record['dnafrag_id']
     except NoItemError:
         raise RuntimeError, 'No DNA fragment identified'
     return dnafrag_id
Exemplo n.º 20
0
    def _get_dnafrag_id_for_coord(self, coord):
        """returns the dnafrag_id for the coordnate"""
        dnafrag_table = self.ComparaDb.getTable('dnafrag')
        genome_db_table = self.ComparaDb.getTable('genome_db')

        # column renamed between versions
        prefix = coord.genome.Species.lower()
        if int(self.Release) > 58:
            prefix = _Species.getEnsemblDbPrefix(prefix)

        query = sql.select([dnafrag_table.c.dnafrag_id,
                           dnafrag_table.c.coord_system_name],
                  sql.and_(dnafrag_table.c.genome_db_id ==\
                                            genome_db_table.c.genome_db_id,
                                genome_db_table.c.name == prefix,
                                dnafrag_table.c.name == coord.CoordName))
        try:
            record = asserted_one(query.execute().fetchall())
            dnafrag_id = record['dnafrag_id']
        except NoItemError:
            raise RuntimeError, 'No DNA fragment identified'
        return dnafrag_id
Exemplo n.º 21
0
    def __init__(self,
                 species,
                 Release,
                 account=None,
                 pool_recycle=None,
                 division=None):
        assert Release, 'invalid release specified'
        self.Release = str(Release)
        if account is None:
            account = get_ensembl_account(release=Release)
        self._account = account
        self._pool_recycle = pool_recycle
        self._compara_db = None
        sp = sorted([_Species.getSpeciesName(sp) for sp in set(species)])
        self.Species = tuple(sp)
        self._genomes = {}
        self._attach_genomes()

        self._species_id_map = None
        self._species_db_map = None
        self._species_set = None
        self._method_species_link = None
        self.division = division
Exemplo n.º 22
0
 def __init__(self, species):
     self.Species = _Species.getSpeciesName(species)
Exemplo n.º 23
0
 def test_get_compara_name(self):
     """should correctly form valid names for assignment onto objects"""
     self.assertEqual(Species.getComparaName('pika'), 'Pika')
     self.assertEqual(Species.getComparaName('C.elegans'), 'Celegans')
     self.assertEqual(Species.getComparaName('Caenorhabditis elegans'),
             'Celegans')
Exemplo n.º 24
0
 def test_amend_existing(self):
     """should correctly amend an existing species"""
     species_name = 'Ochotona princeps'
     common_name1 = 'american pika'
     common_name2 = 'pika'
     ensembl_pref = 'ochotona_princeps'
     Species.amendSpecies(species_name, common_name1)
     self.assertEqual(Species.getCommonName(species_name),common_name1)
     Species.amendSpecies(species_name, common_name2)
     self.assertEqual(Species.getSpeciesName(common_name2), species_name)
     self.assertEqual(Species.getSpeciesName(ensembl_pref), species_name)
     self.assertEqual(Species.getCommonName(species_name), common_name2)
     self.assertEqual(Species.getCommonName(ensembl_pref), common_name2)
     self.assertEqual(Species.getEnsemblDbPrefix(species_name),
         ensembl_pref)
     self.assertEqual(Species.getEnsemblDbPrefix(common_name2),
         ensembl_pref)
Exemplo n.º 25
0
 def test_amend_existing(self):
     """should correctly amend an existing species"""
     species_name = 'Ochotona princeps'
     common_name1 = 'american pika'
     common_name2 = 'pika'
     ensembl_pref = 'ochotona_princeps'
     Species.amendSpecies(species_name, common_name1)
     self.assertEqual(Species.getCommonName(species_name), common_name1)
     Species.amendSpecies(species_name, common_name2)
     self.assertEqual(Species.getSpeciesName(common_name2), species_name)
     self.assertEqual(Species.getSpeciesName(ensembl_pref), species_name)
     self.assertEqual(Species.getCommonName(species_name), common_name2)
     self.assertEqual(Species.getCommonName(ensembl_pref), common_name2)
     self.assertEqual(Species.getEnsemblDbPrefix(species_name),
                      ensembl_pref)
     self.assertEqual(Species.getEnsemblDbPrefix(common_name2),
                      ensembl_pref)
Exemplo n.º 26
0
 def test_get_compara_name(self):
     """should correctly form valid names for assignment onto objects"""
     self.assertEqual(Species.getComparaName('pika'), 'Pika')
     self.assertEqual(Species.getComparaName('C.elegans'), 'Celegans')
     self.assertEqual(Species.getComparaName('Caenorhabditis elegans'),
                      'Celegans')
Exemplo n.º 27
0
 def getSyntenicRegions(self, Species=None, CoordName=None, Start=None,
         End=None, Strand=1, ensembl_coord=False, region=None,
         align_method=None, align_clade=None, method_clade_id=None):
     """returns a SyntenicRegions instance
     
     Arguments:
         - Species: the species name
         - CoordName, Start, End, Strand: the coordinates for the region
         - ensembl_coord: whether the coordinates are in Ensembl form
         - region: a region instance or a location, in which case the
           CoordName etc .. arguments are ignored
         - align_method, align_clade: the alignment method and clade to use
           Note: the options for this instance can be found by printing
           the method_species_links attribute of this object.
         - method_clade_id: over-rides align_method/align_clade. The entry
           in method_species_links under method_link_species_set_id
           """
     assert (align_method and align_clade) or method_clade_id, \
             'Must specify (align_method & align_clade) or method_clade_id'
     if method_clade_id is None:
         for row in self.method_species_links:
             if align_method.lower() in row['align_method'].lower() and\
                     align_clade.lower() in row['align_clade'].lower():
                 method_clade_id = row['method_link_species_set_id']
     
     if method_clade_id is None:
         raise RuntimeError, "Invalid align_method[%s] or align_clade "\
                             "specified[%s]" % (align_method, align_clade)
     
     if region is None:
         ref_genome = self._genomes[_Species.getSpeciesName(Species)]
         region = ref_genome.makeLocation(CoordName=CoordName,
                             Start=Start, End=End, Strand=Strand,
                             ensembl_coord=ensembl_coord)
     elif hasattr(region, 'Location'):
         region = region.Location
     
     # make sure the genome instances match
     ref_genome = self._genomes[region.genome.Species]
     if ref_genome is not region.genome:
         # recreate region from our instance
         region = ref_genome.makeLocation(CoordName=region.CoordName,
                             Start=region.Start, End=region.End,
                             Strand=region.Strand)
     
     ref_dnafrag_id = self._get_dnafrag_id_for_coord(region)
     blocks=self._get_genomic_align_blocks_for_dna_frag_id(method_clade_id,
                                                 ref_dnafrag_id, region)
     for block in blocks:
         genomic_align_block_id = block['genomic_align_block_id']
         # we get joint records for these identifiers from
         records = self._get_joint_genomic_align_dnafrag(
                                             genomic_align_block_id)
         members = []
         ref_location = None
         for record in records:
             taxon_id = self.genome_taxon[record.genome_db_id]
             genome = self.taxon_id_species[taxon_id]
             # we have a case where we getback different coordinate system
             # results for the ref genome. We keep only those that match
             # the CoordName of region
             
             if genome is region.genome and \
                     record.name == region.CoordName:
                 # this is the ref species and we adjust the ref_location
                 # for this block
                 diff_start = record.dnafrag_start-region.EnsemblStart
                 shift_start = [0, diff_start][diff_start > 0]
                 diff_end = record.dnafrag_end-region.EnsemblEnd
                 shift_end = [diff_end, 0][diff_end > 0]
                 try:
                     ref_location = region.resized(shift_start, shift_end)
                 except ValueError:
                     # we've hit some ref genome fragment that matches
                     # but whose coordinates aren't right
                     continue
             elif genome is region.genome:
                 continue
             members += [(genome, record)]
         assert ref_location is not None, "Failed to make the reference"\
                                                         " location"
         yield SyntenicRegions(self, members, ref_location=ref_location)
Exemplo n.º 28
0
 def __call__(self, species = None, core_db=None, feature_types=None, var_db=None, otherfeature_db=None):
     if 'variation' in feature_types:
         assert var_db is not None
     species = _Species.getSpeciesName(core_db.db_name.Species or species)
     self._set_species_feature_levels(species, core_db, feature_types, var_db, otherfeature_db)
     return self._species_feature_levels[species]
Exemplo n.º 29
0
 def __init__(self, species):
     self.Species = _Species.getSpeciesName(species)
Exemplo n.º 30
0
    def getSyntenicRegions(self,
                           Species=None,
                           CoordName=None,
                           Start=None,
                           End=None,
                           Strand=1,
                           ensembl_coord=False,
                           region=None,
                           align_method=None,
                           align_clade=None,
                           method_clade_id=None):
        """returns a SyntenicRegions instance
        
        Arguments:
            - Species: the species name
            - CoordName, Start, End, Strand: the coordinates for the region
            - ensembl_coord: whether the coordinates are in Ensembl form
            - region: a region instance or a location, in which case the
              CoordName etc .. arguments are ignored
            - align_method, align_clade: the alignment method and clade to use
              Note: the options for this instance can be found by printing
              the method_species_links attribute of this object.
            - method_clade_id: over-rides align_method/align_clade. The entry
              in method_species_links under method_link_species_set_id
              """
        assert (align_method and align_clade) or method_clade_id, \
                'Must specify (align_method & align_clade) or method_clade_id'
        if method_clade_id is None:
            for row in self.method_species_links:
                if align_method.lower() in row['align_method'].lower() and\
                        align_clade.lower() in row['align_clade'].lower():
                    method_clade_id = row['method_link_species_set_id']

        if method_clade_id is None:
            raise RuntimeError, "Invalid align_method[%s] or align_clade "\
                                "specified[%s]" % (align_method, align_clade)

        if region is None:
            ref_genome = self._genomes[_Species.getSpeciesName(Species)]
            region = ref_genome.makeLocation(CoordName=CoordName,
                                             Start=Start,
                                             End=End,
                                             Strand=Strand,
                                             ensembl_coord=ensembl_coord)
        elif hasattr(region, 'Location'):
            region = region.Location

        # make sure the genome instances match
        ref_genome = self._genomes[region.genome.Species]
        if ref_genome is not region.genome:
            # recreate region from our instance
            region = ref_genome.makeLocation(CoordName=region.CoordName,
                                             Start=region.Start,
                                             End=region.End,
                                             Strand=region.Strand)

        ref_dnafrag_id = self._get_dnafrag_id_for_coord(region)
        blocks = self._get_genomic_align_blocks_for_dna_frag_id(
            method_clade_id, ref_dnafrag_id, region)
        for block in blocks:
            genomic_align_block_id = block['genomic_align_block_id']
            # we get joint records for these identifiers from
            records = self._get_joint_genomic_align_dnafrag(
                genomic_align_block_id)
            members = []
            ref_location = None
            for record in records:
                taxon_id = self.genome_taxon[record.genome_db_id]
                genome = self.taxon_id_species[taxon_id]
                # we have a case where we getback different coordinate system
                # results for the ref genome. We keep only those that match
                # the CoordName of region

                if genome is region.genome and \
                        record.name == region.CoordName:
                    # this is the ref species and we adjust the ref_location
                    # for this block
                    diff_start = record.dnafrag_start - region.EnsemblStart
                    shift_start = [0, diff_start][diff_start > 0]
                    diff_end = record.dnafrag_end - region.EnsemblEnd
                    shift_end = [diff_end, 0][diff_end > 0]
                    try:
                        ref_location = region.resized(shift_start, shift_end)
                    except ValueError:
                        # we've hit some ref genome fragment that matches
                        # but whose coordinates aren't right
                        continue
                elif genome is region.genome:
                    continue
                members += [(genome, record)]
            assert ref_location is not None, "Failed to make the reference"\
                                                            " location"
            yield SyntenicRegions(self, members, ref_location=ref_location)