Пример #1
0
    def __init__(self, target, source=None, only_swissprot=True, mapper=None):
        """
        This class translates between homologous UniProt IDs of
        2 organisms based on NCBI HomoloGene data.
        Uses RefSeq and Entrez IDs for translation.
        
        
        :param int target: NCBI Taxonomy ID of the organism
                           to be translated to.
        :param int source: NCBI Taxonomy ID of the default organism
                           to be translated from.
        :param bool only_swissprot: Whether only SwissProt or Trembl IDs
                                    should be used.
        :mapper pypath.mapping.Mapper mapper: A Mapper object.
        """

        self.h**o = {}
        self.only_swissprot = only_swissprot
        self.target = target
        self.source = source
        self.set_default_source(source)
        self.mapper = mapping.Mapper() if mapper is None else mapper

        Proteomes.__init__(self)
        self.load_proteome(self.target, self.only_swissprot)

        if source is not None:
            self.homologene_uniprot_dict(source)
Пример #2
0
def get_reg_rels(pa, prot_list):
    reg_rels = set()
    m = mapping.Mapper(9606)
    for prot in prot_list:
        # if pa.dgenesymbol(prot) is None:
        #     continue
        # print(prot)
        pos_regs = set(pa.gs_stimulated_by(prot).gs())
        neg_regs = set(pa.gs_inhibited_by(prot).gs())
        ambig_regs = pos_regs.intersection(neg_regs)
        pos_regs = pos_regs - ambig_regs
        neg_regs = neg_regs - ambig_regs
        for reg in pos_regs:
            if reg == prot or reg not in prot_list:
                continue
            srcs = pa.get_edge(reg, prot)["sources"]
            if len(srcs) < 2:
                continue
            reg_rels.add((reg, prot, "activates"))
        for reg in neg_regs:
            if reg == prot or reg not in prot_list:
                continue
            srcs = pa.get_edge(reg, prot)["sources"]
            if len(srcs) < 2:
                continue
            reg_rels.add((reg, prot, "inhibits"))
        for reg in ambig_regs:
            if reg == prot or reg not in prot_list:
                continue
            srcs = pa.get_edge(reg, prot)["sources"]
            if len(srcs) < 2:
                continue
            dirs = pa.get_edge(reg, prot)["dirs"]
            signs_dict = dirs.majority_sign()
            signs = None
            for sign_pair in signs_dict:
                sign_reg = m.map_name(sign_pair[0], "uniprot", "genesymbol")[0]
                sign_sub = m.map_name(sign_pair[1], "uniprot", "genesymbol")[0]
                if sign_reg == reg and sign_sub == prot:
                    sign = signs_dict[sign_pair]
                    break
            if sign is None:
                print("crap", reg, prot)
            if sign is None or sign[0] and sign[1]:
                # ambiguous sign
                continue
            elif sign[0]:
                reg_rels.add((reg, prot, "activates"))
            elif sign[1]:
                reg_rels.add((reg, prot, "inhibits"))
            else:
                continue
    return reg_rels
Пример #3
0
    def __init__(self,
                 source,
                 reactants=[],
                 products=[],
                 references=[],
                 id_type='uniprot',
                 names_reactants=None,
                 names_products=None,
                 ptms_reactants=None,
                 ncbi_tax_id=9606,
                 ptms_products=None,
                 mapper=None,
                 seq=None):
        '''
        source : str
        Source database, e.g. `Reactome`.

        reactants : list
        List of reactants. List of string IDs or list of lists, if
        reactants are complexes.

        products : list
        List of products. List of string IDs or list of lists, if
        products are complexes.

        id_type : str
        Type of reactant and product IDs. Default is UniProt ID.

        mapper : pypath.mapping.Mapper
        If no Mapper instance given, a new one will be initialized.
        '''
        self.mapper = mapper if mapper is not None else mapping.Mapper()
        self.source = source
        self.references = references
        self.ncbi_tax_id = ncbi_tax_id
        self.reactants = {}
        self.products = {}
        reactants = (ids if type(ids) is list else [ids] for ids in reactants)
        products = (ids if type(ids) is list else [ids] for ids in products)
        names_reactants = names_reactants if type(names_reactants) is list \
            else (self.species_name(ids) for ids in self.reactants)
        names_reactants = dict(
            zip((species_id(ids) for ids in reactants), names_reactants))
        names_products = names_products if type(names_products) is list \
            else (self.species_name(ids) for ids in self.products)
        names_products = dict(
            zip((species_id(ids) for ids in products), names_products))
        self.add_species('reactants', reactants, names_reactants, source)
        self.add_species('products', products, names_products, source)
        if source == 'Reactome':
            for i, name in names_reactants.iteritems():
                ptms = self.reactome_ptms(i, name)
Пример #4
0
 def __init__(self, user=None, mapper=None):
     self.user = user if user is not None \
         else globals()['MSIGDB_USER'] if 'MSIGDB_USER' in globals() \
         else None
     if self.user is not None:
         self.login()
         self.mapper = mapper if mapper is not None else mapping.Mapper()
         self.info = {}
         self.groups = {}
         self.sets = {}
         self.collections = {}
         self.list_collections()
         self.ids = {'entrez': 'entrez', 'symbol': 'genesymbol'}
         self.target_id = 'uniprot'
     else:
         sys.stdout.write('\t:: Please provide an MSigDB username by \n'
                          '``pypath.gsea.GSEA(user = \'\', ...)``, or by \n'
                          'setting ``MSIGDB_USER`` global variable.\n\n')
         sys.stdout.flush()
Пример #5
0
 def __init__(self,
              chembl_mysql=(None, 'chembl_ebi'),
              ncbi_tax_id=9606,
              mapping_mysql=None,
              mapper=None):
     self.mysql = mysql.MysqlRunner(chembl_mysql)
     self.ncbi_tax_id = ncbi_tax_id
     if mapper.__class__.__name__ != 'Mapper':
         self.mapper = mapping.Mapper(ncbi_tax_id, mapping_mysql)
         # self.mapper.load_mappings(maps=data_formats.mapListUniprot)
     else:
         self.mapper = mapper
     self.chembl_uniprot_table()
     self.result = None
     # constant elements:
     self.mandatory_fields = set([
         'compound_chembl', 'target_uniprot', 'tax_id', 'target_type',
         'potential_duplicate'
     ])
     self.extra_fields = [
         'compound_names', 'action_type', 'target_domains',
         'predicted_binding_domains', 'activities', 'pchembl'
     ]
     self.set_group_concat_len = '''SET group_concat_max_len=18446744073709551615;'''
     self.group_concat_len_increased = False
     self.pbd_join = '''
     /* predicted binding domains */
     LEFT JOIN predicted_binding_domains AS pbd 
         ON ac.activity_id = pbd.activity_id 
     LEFT JOIN site_components AS sc 
         ON pbd.site_id = sc.site_id 
     LEFT JOIN domains AS pdm 
         ON sc.domain_id = pdm.domain_id '''
     self.pbd_select = ''',
     GROUP_CONCAT(
         DISTINCT(pdm.source_domain_id) 
         SEPARATOR ";") AS predicted_binding_domains'''
     self.dom_join = '''
     /* domains of the target */
     LEFT JOIN component_domains AS cd 
         ON tc.component_id = cd.component_id 
     LEFT JOIN domains AS tdm 
         ON cd.domain_id = tdm.domain_id '''
     self.dom_select = ''',
     GROUP_CONCAT(
         DISTINCT(tdm.source_domain_id) 
         SEPARATOR ";") AS target_domains'''
     self.atype_join = '''
     LEFT JOIN drug_mechanism AS dm 
         ON (dm.molregno = md.molregno AND dm.tid = td.tid)'''
     self.atype_select = ''',
         GROUP_CONCAT(DISTINCT(dm.action_type) SEPARATOR ';') AS action_type'''
     self.cprop_select = ',\n            CAST(cp.%s AS CHAR) AS %s'
     self.cprop_join = '''
     /* various properties of the compounds */
     LEFT JOIN compound_properties AS cp
         ON md.molregno = cp.molregno'''
     self.act_join = '''
     /* this is for activity values, if available */
     LEFT JOIN assays AS ay 
         ON dm.tid = ay.tid 
     LEFT JOIN activities AS ac 
         ON (ac.assay_id = ay.assay_id AND ac.molregno = dm.molregno)'''
     self.act_join = '''
     /* this is for activity values, if available */
     LEFT JOIN (
         SELECT 
             ay.tid,
             ac.standard_type,
             ac.standard_value,
             ac.standard_units,
             ac.pchembl_value,
             ac.molregno,
             ac.activity_id 
         FROM assays AS ay 
         INNER JOIN activities AS ac 
         ON ac.assay_id = ay.assay_id 
     ) AS ac ON (ac.molregno = dm.molregno AND ac.tid = dm.tid)'''
     self.act_select = ''',
         GROUP_CONCAT(DISTINCT(CONCAT(
                 ac.standard_type,'=',ac.standard_value,'=',ac.standard_units
             )) SEPARATOR ';') AS activities'''
     self.pchembl_select = ''',
         GROUP_CONCAT(DISTINCT(ac.pchembl_value) SEPARATOR ';') AS pchembl'''
     self.group_concat_len = '''SET group_concat_max_len=18446744073709551615;'''
     self.comp_syn = '''SELECT 
             ms.synonyms AS syn,
             md.chembl_id 
         FROM 
             molecule_synonyms AS ms 
         LEFT JOIN molecule_dictionary AS md 
             ON ms.molregno = md.molregno 
         WHERE ms.synonyms IN (%s) 
         GROUP BY 
             ms.synonyms, md.chembl_id 
         ORDER BY NULL;'''
     self.comp_rec = '''SELECT 
Пример #6
0
    def init_mapper(self):

        self.mapper = self.mapper or mapping.Mapper()
Пример #7
0
    def init_mapper(self):

        if self.mapper is None:

            self.mapper = mapping.Mapper()
Пример #8
0
    def set_mapper(self):

        if self.mapper is None:

            self.mapper = mapping.Mapper(ncbi_tax_id=self.ncbi_tax_id)
Пример #9
0
# cogent and sqlalchemy modules need to be installed:
# pip2 install cogent
# pip2 install sqlalchemy
from cogent.db.ensembl import HostAccount, Species, Genome
from pypath import mapping

Release = 78
account = HostAccount('ensembldb.ensembl.org', 'anonymous', '')

human = Genome(Species='human', Release=Release, account=account)

# UniProt, seq offset, residue, isoform
positions = [('P00533', 40, 'Q', 1), ('P60520', 30, 'P', 1)]

m = mapping.Mapper()
m.load_uniprot_mappings(['ensg'], bi=True)

positions_ens = []
for p in positions:
    ensgs = m.map_name(p[0], 'uniprot', 'ensg')
    for ensg in ensgs:
        genes = human.getGenesMatching(StableId=ensg)
        for gene in genes:
            positions_ens.append(
                tuple([ensg, gene.Location, gene.CanonicalTranscript.Exons] +
                      list(p)))

# another attempts with biopython --
# (it works, if you can map all proteins to RefSeq Gene IDs)