Exemplo n.º 1
0
    def make_df(self, tax_id = False):

        hdr = ['enzyme', 'substrate', 'isoforms',
               'residue_type', 'residue_offset', 'modification',
               'sources', 'references']

        self.df = pd.DataFrame(
            [dm.get_line() for dm in self],
            columns = hdr
        ).astype(
            {
                'enzyme': 'category',
                'substrate': 'category',
                'isoforms': 'category',
                'residue_type': 'category',
                'residue_offset': 'int32',
                'modification': 'category',
            }
        )

        self.df['enzyme_genesymbol'] = pd.Series([
            (
                mapping.map_name0(
                    u,
                    id_type = 'uniprot',
                    target_id_type = 'genesymbol',
                    ncbi_tax_id = self.ncbi_tax_id,
                ) or ''
            )
            for u in self.df.enzyme
        ])

        self.df['substrate_genesymbol'] = pd.Series([
            (
                mapping.map_name0(
                    u,
                    id_type = 'uniprot',
                    target_id_type = 'genesymbol',
                    ncbi_tax_id = self.ncbi_tax_id,
                ) or ''
            )
            for u in self.df.substrate
        ])

        hdr.insert(2, 'enzyme_genesymbol')
        hdr.insert(3, 'substrate_genesymbol')

        self.df = self.df.loc[:,hdr]

        if tax_id:

            self.df['ncbi_tax_id'] = [self.ncbi_tax_id] * self.df.shape[0]
Exemplo n.º 2
0
    def build_gene(self):

        self.gene = set()

        for entity in self._entities:

            # we add the components of the complexes to the protein data
            # frame; I don't know if it's necessary but does not harm I guess
            if hasattr(entity, 'components'):

                components = entity.components

            else:

                components = (entity, )

            for comp in components:

                name = mapping.map_name0(comp, 'uniprot', 'genesymbol')
                ensembl_genes = mapping.map_name(comp, 'uniprot', 'ensembl')

                for ensembl in ensembl_genes:

                    self.gene.add(
                        CellPhoneDBGene(
                            gene_name=name,
                            uniprot=comp,
                            hgnc_symbol=name,
                            ensembl=ensembl,
                        ))
Exemplo n.º 3
0
    def stoichiometry_str_genesymbols(self):

        return ';'.join(
            itertools.chain(*(((mapping.map_name0(
                uniprot,
                'uniprot',
                'genesymbol',
            ) or uniprot), ) * cnt for uniprot, cnt in sorted(
                iteritems(self.components),
                key=lambda comp_cnt: comp_cnt[0],
            ))))
Exemplo n.º 4
0
        def get_id_name(entity):

            id_ = entity.__str__()

            name = (id_ if 'COMPLEX' in id_ else mapping.map_name0(
                id_,
                'uniprot',
                'uniprot-entry',
            ))

            return id_, name
Exemplo n.º 5
0
 def build_protein(self):
     
     integrins = annot.db.annots['Integrins']
     
     self.cpdb_protein = set()
     
     for entity in self._entities:
         
         # we add the components of the complexes to the protein data
         # frame; I don't know if it's necessary but does not harm I guess
         if hasattr(entity, 'components'):
             
             components = entity.components
             
         else:
             
             components = (entity,)
         
         for comp in components:
             
             classes = self.intercell.classes_by_entity(comp)
             
             self.cpdb_protein.add(
                 CellPhoneDBProtein(
                     uniprot = comp.__str__(),
                     protein_name = mapping.map_name0(
                         comp,
                         'uniprot',
                         'uniprot-entry',
                     ),
                     transmembrane = 'transmembrane' in classes,
                     peripheral = 'cell_surface' in classes,
                     secreted = 'secreted' in classes,
                     secreted_desc = '',
                     secreted_highlight = '',
                     receptor = 'receptor' in classes,
                     receptor_desc = '',
                     integrin = comp in integrins,
                     other = '',
                     other_desc = '',
                     tags = '',
                     tags_reason = '',
                     tags_description = '',
                 )
             )
Exemplo n.º 6
0
    def genesymbols(self):

        return sorted(
            (mapping.map_name0(uniprot, 'uniprot', 'genesymbol') or uniprot)
            for uniprot in self.components.keys())