示例#1
0
    def homologene_uniprot_dict(self, source):
        """
        Builds orthology translation table as dict from UniProt to Uniprot,
        obtained from NCBI HomoloGene data. Uses RefSeq and Entrez IDs for
        translation.
        """

        source = self.get_source(source)

        self.h**o[source] = {}

        hge = dataio.homologene_dict(source, self.target, 'entrez')
        hgr = dataio.homologene_dict(source, self.target, 'refseq')

        self.load_proteome(source, self.only_swissprot)

        for u in self._proteomes[(source, self.only_swissprot)]:

            source_e = self.mapper.map_name(u, 'uniprot', 'entrez', source)
            source_r = self.mapper.map_name(u, 'uniprot', 'refseqp', source)
            target_u = set([])
            target_r = set([])
            target_e = set([])

            for e in source_e:
                if e in hge:
                    target_e.update(hge[e])

            for r in source_r:
                if r in hgr:
                    target_r.update(hgr[r])

            for e in target_e:
                target_u.update(
                    set(
                        self.mapper.map_name(e, 'entrez', 'uniprot',
                                             self.target)))

            for r in target_r:
                target_u.update(
                    set(
                        self.mapper.map_name(e, 'refseqp', 'uniprot',
                                             self.target)))

            target_u = \
                itertools.chain(
                    *map(
                        lambda tu:
                            self.mapper.map_name(
                                tu, 'uniprot', 'uniprot', self.target),
                        target_u
                    )
                )

            self.h**o[source][u] = sorted(list(target_u))
示例#2
0
 def test_homologene_dict(self):
     
     h = dataio.homologene_dict(9606, 10090, 'GeneSymbol')
     
     assert 'Stard10' in h['STARD10']