示例#1
0
 def __init__(self, filename=None):
     self._hgnc_service = HGNC()
     if filename == None:
         self.alldata = self.load_all_hgnc()
         self.df = self.build_dataframe()
     else:
         self.read_csv(filename)
示例#2
0
 def _lookup_hgnc_id(self):
     hgnc_web = HGNC()
     hgnc = hgnc_web.fetch('hgnc_id', self.external_id)
     if hgnc['response']['numFound'] == 1:
         self.name = hgnc['response']['docs'][0]['symbol']
         self.description = hgnc['response']['docs'][0]['name']
         # Get synonyms if requested.
         if self.get_synonyms:
             for item in hgnc['response']['docs'][0]['alias_symbol']:
                 self.synonyms.append(item)
     elif hgnc['response']['numFound'] == 0:
         self.error = "No results found when querying HGNC for {}".format(
             self.external_id)
     return self
示例#3
0
 def __init__(self, filename=None):
     self._hgnc_service = HGNC()
     if filename == None:
         self.alldata = self.load_all_hgnc()
         self.df = self.build_dataframe()
     else:
         self.read_csv(filename)
示例#4
0
class HGNCMapper(object):
    hgnc_dblink =  ['EC','Ensembl', 'EntrezGene', 'GDB', 'GENATLAS',
            'GeneCards', 'GeneTests', 'GoPubmed', 'H-InvDB', 'HCDM', 'HCOP',
            'HGNC', 'HORDE', 'IMGT_GENE_DB', 'INTERFIL', 'IUPHAR', 'KZNF',
            'MEROPS', 'Nucleotide', 'OMIM', 'PubMed', 'RefSeq', 'Rfam',
            'Treefam', 'UniProt', 'Vega', 'miRNA', 'snoRNABase']
    def __init__(self, filename=None):
        self._hgnc_service = HGNC()
        if filename == None:
            self.alldata = self.load_all_hgnc()
            self.df = self.build_dataframe()
        else:
            self.read_csv(filename)

    def load_all_hgnc(self):
        """keys are unique Gene names"""
        print("Fetching the data from HGNC first. May take a few minutes"),
        alldata = self._hgnc_service.mapping_all()
        print("done")
        return alldata

    def build_dataframe(self):
        # simplify to get a dictionary of dictionary
        data = {k1:{k2:v2['xkey'] for k2,v2 in self.alldata[k1].iteritems()} for k1 in self.alldata.keys()}
        dfdata = pd.DataFrame(data)
        dfdata = dfdata.transpose()
        # rename to tag with "HGNC"
        dfdata.columns = [this + "__HGNC_mapping" for this in dfdata.columns]
        print("a dataframe was built using HGNC data set and saved in attributes  self._df_hgnc")
        return dfdata
示例#5
0
def add_sequence_to_nodes(n: str, d: Dict[str, Any]):
    """
    Maps UniProt ACC to UniProt ID. Retrieves sequence from UniProt and adds it to the node as a feature

    :param n: Graph node.
    :type n: str
    :param d: Graph attribute dictionary.
    :type d: Dict[str, Any]
    """
    h = HGNC(verbose=False)
    u = UniProt(verbose=False)

    d["uniprot_ids"] = h.fetch(
        "symbol", d["protein_id"])["response"]["docs"][0]["uniprot_ids"]

    # Todo these API calls should probably be batched
    # Todo mapping with bioservices to support other protein IDs?

    for id in d["uniprot_ids"]:
        d[f"sequence_{id}"] = u.get_fasta_sequence(id)
示例#6
0
def kegg_to_hugo(genes, species='hsa'):
    """
    Converts all KEGG names to HGNC

    Parameters
    ----------
    genes : list
    species : str

    Returns
    -------
    dict
    """
    prefix = species + ':'
    hugo = HGNC(verbose=True)
    hugo_dict = {}
    not_found = set()
    for i in genes:
        tmp_name = i.lstrip(prefix)
        mapping = hugo.search(tmp_name)
        if 'response' in mapping:
            response = mapping['response']
            if 'numFound' in response:
                if response['numFound'] == 0:
                    not_found.add(i)
                    continue
                elif response['numFound'] == 1:
                    docs = response['docs'][0]
                    hugo_dict[i] = docs['symbol']
                    continue
                else:
                    if 'symbol' in response['docs'][0]:
                        hugo_dict[i] = response['docs'][0]['symbol']
        else:
            not_found.add(i)
    if not_found != 0:
        print("{} not found after HGNC mapping".format(len(not_found)))
        print("{} ".format(not_found))
    return hugo_dict, not_found
示例#7
0
def kegg_to_hugo(genes, species='hsa'):
    """
    Converts all KEGG names to HGNC

    Parameters
    ----------
    genes : list
    species : str

    Returns
    -------
    dict
    """
    prefix = species + ':'
    hugo = HGNC(verbose=True)
    hugo_dict = {}
    not_found = set()
    for i in genes:
        tmp_name = i.lstrip(prefix)
        mapping = hugo.search(tmp_name)
        if 'response' in mapping:
            response = mapping['response']
            if 'numFound' in response:
                if response['numFound'] == 0:
                    not_found.add(i)
                    continue
                elif response['numFound'] == 1:
                    docs = response['docs'][0]
                    hugo_dict[i] = docs['symbol']
                    continue
                else:
                    if 'symbol' in response['docs'][0]:
                        hugo_dict[i] = response['docs'][0]['symbol']
        else:
            not_found.add(i)
    if not_found != 0:
        print("{} not found after HGNC mapping".format(len(not_found)))
        print("{} ".format(not_found))
    return hugo_dict, not_found
示例#8
0
    def __init__(self, verbosity="INFO"):
        super(Mapper, self).__init__(level=verbosity)
        self.logging.info("Initialising the services")
        self.logging.info("... uniprots")
        self._uniprot_service = UniProt()

        self.logging.info("... KEGG")
        self._kegg_service = KeggParser(verbose=False)

        self.logging.info("... HGNC")
        self._hgnc_service = HGNC()

        self.logging.info("... UniChem")
        self._unichem_service = UniChem()

        self.logging.info("...BioDBNet")
        self._biodbnet = BioDBNet()
示例#9
0
class HGNCMapper(object):
    hgnc_dblink = [
        'EC', 'Ensembl', 'EntrezGene', 'GDB', 'GENATLAS', 'GeneCards',
        'GeneTests', 'GoPubmed', 'H-InvDB', 'HCDM', 'HCOP', 'HGNC', 'HORDE',
        'IMGT_GENE_DB', 'INTERFIL', 'IUPHAR', 'KZNF', 'MEROPS', 'Nucleotide',
        'OMIM', 'PubMed', 'RefSeq', 'Rfam', 'Treefam', 'UniProt', 'Vega',
        'miRNA', 'snoRNABase'
    ]

    def __init__(self, filename=None):
        self._hgnc_service = HGNC()
        if filename == None:
            self.alldata = self.load_all_hgnc()
            self.df = self.build_dataframe()
        else:
            self.read_csv(filename)

    def load_all_hgnc(self):
        """keys are unique Gene names"""
        print("Fetching the data from HGNC first. May take a few minutes"),
        alldata = self._hgnc_service.mapping_all()
        print("done")
        return alldata

    def build_dataframe(self):
        # simplify to get a dictionary of dictionary
        data = {
            k1: {k2: v2['xkey']
                 for k2, v2 in self.alldata[k1].iteritems()}
            for k1 in self.alldata.keys()
        }
        dfdata = pd.DataFrame(data)
        dfdata = dfdata.transpose()
        # rename to tag with "HGNC"
        dfdata.columns = [this + "__HGNC_mapping" for this in dfdata.columns]
        print(
            "a dataframe was built using HGNC data set and saved in attributes  self._df_hgnc"
        )
        return dfdata
示例#10
0
def test_hgnc():

    h = HGNC()
    h.get_info()

    h.fetch('symbol', 'ZNF3')

    h.fetch('alias_name', 'A-kinase anchor protein, 350kDa')

    h.search('BRAF')
    h.search('symbol', 'ZNF*')
    h.search('symbol', 'ZNF?')
    h.search('symbol', 'ZNF*+AND+status:Approved')
    h.search('symbol', 'ZNF3+OR+ZNF12')
    h.search('symbol', 'ZNF*+NOT+status:Approved')
示例#11
0
 def __init__(self):
     self.s = HGNC(verbose=False)
示例#12
0
class test_hgnc():

    def __init__(self):
        self.s = HGNC(verbose=False)

    @attr('skip')
    def test_get_xml(self):
        xml = self.s.get_xml("ZAP70")
        xml = self.s.get_xml("ZAP70;INSR")
        assert len(xml.findAll("gene")) == 2
        self.s.get_xml("wrong")

    @attr('skip')
    def test_aliases(self):
        assert self.s.get_aliases("ZAP70") == [u'ZAP-70', u'STD']
        self.s.get_name("ZAP70")
        self.s.get_chromosome("ZAP70")
        self.s.get_previous_symbols("ZAP70")
        self.s.get_withdrawn_symbols("ZAP70")
        self.s.get_previous_names("ZAP70")



    @attr('skip')
    def test_xref(self):
        assert self.s.get_xrefs("ZAP70")['UniProt']['xkey'] == 'P43403'
        assert self.s.get_xrefs("ZAP70", "xml")['UniProt']['link'] == ['http://www.uniprot.org/uniprot/P43403.xml']

    @attr('skip')
    def test_lookfor(self):    
        self.s.lookfor("ZAP70")

    @attr('skip')
    def test_mapping(self):
        value = "UniProt:P43403"
        res = self.s.mapping(value)
        res[0]['xlink:title'] == "ZAP70"
示例#13
0
def test_hgnc():

    h = HGNC()
    h.get_info()

    h.fetch("symbol", "ZNF3")

    h.fetch("alias_name", "A-kinase anchor protein, 350kDa")

    h.search("BRAF")
    h.search("symbol", "ZNF*")
    h.search("symbol", "ZNF?")
    h.search("symbol", "ZNF*+AND+status:Approved")
    h.search("symbol", "ZNF3+OR+ZNF12")
    h.search("symbol", "ZNF*+NOT+status:Approved")
示例#14
0
 def __init__(self):
     self.s = HGNC(verbose=False)
示例#15
0
class test_hgnc():
    def __init__(self):
        self.s = HGNC(verbose=False)

    def test_get_xml(self):
        xml = self.s.get_xml("ZAP70")
        xml = self.s.get_xml("ZAP70;INSR")
        assert len(xml.findAll("gene")) == 2
        self.s.get_xml("wrong")

    def test_aliases(self):
        assert self.s.get_aliases("ZAP70") == [u'ZAP-70', u'STD']
        self.s.get_name("ZAP70")
        self.s.get_chromosome("ZAP70")
        self.s.get_previous_symbols("ZAP70")
        self.s.get_withdrawn_symbols("ZAP70")
        self.s.get_previous_names("ZAP70")

    def test_xref(self):
        assert self.s.get_xrefs("ZAP70")['UniProt']['xkey'] == 'P43403'
        assert self.s.get_xrefs("ZAP70", "xml")['UniProt']['link'] == [
            'http://www.uniprot.org/uniprot/P43403.xml'
        ]

    def test_lookfor(self):
        self.s.lookfor("ZAP70")

    def test_mapping(self):
        value = "UniProt:P43403"
        res = self.s.mapping(value)
        res['xlink:title'] == "ZAP70"