示例#1
0
def test_get_hgnc_id():
    hgnc_id = uniprot_client.get_hgnc_id('P07305')
    assert hgnc_id == '4714', hgnc_id
    # NRXN2: ['P58401', 'Q9P2S2'] 8009
    hgnc_id = uniprot_client.get_hgnc_id('P58401')
    assert hgnc_id == '8009', hgnc_id
    hgnc_id = uniprot_client.get_hgnc_id('Q9P2S2')
    assert hgnc_id == '8009', hgnc_id
示例#2
0
def iter_terms(force: bool = False) -> Iterable[Term]:
    """Iterate over NCI PID terms."""
    hgnc_id_to_name = get_id_name_mapping("hgnc")
    hgnc_name_to_id = {v: k for k, v in hgnc_id_to_name.items()}

    for uuid, cx in iter_networks(force=force, use_tqdm=True):
        name = None
        for node in iterate_aspect(cx, "networkAttributes"):
            if node["n"] == "name":
                name = node["v"]

        term = Term(reference=Reference(prefix=PREFIX,
                                        identifier=uuid,
                                        name=name), )

        genes = set()
        for node in iterate_aspect(cx, "nodes"):
            name, reference = node["n"], node["r"]
            hgnc_id = hgnc_name_to_id.get(name)
            if hgnc_id:
                genes.add((hgnc_id, name))
            elif any(reference.startswith(x) for x in ("CHEBI:", "cas:")):
                pass
            elif reference.startswith("uniprot:"):
                uniprot_id = reference[len("uniprot:"):]
                hgnc_id = get_hgnc_id(uniprot_id)
                if hgnc_id is None:  # this only happens for proteins that seem to be virus related
                    # TODO reinvestigate this later
                    logger.debug(
                        "uniprot could not map %s/%s/%s to HGNC",
                        name,
                        reference,
                        get_gene_name(uniprot_id, web_fallback=False),
                    )
                else:
                    name = hgnc_id_to_name[hgnc_id]
                    genes.add((hgnc_id, name))
            else:
                logger.debug(f"unmapped: {name}, {reference}")

        for hgnc_id, hgnc_symbol in genes:
            term.append_relationship(has_part,
                                     Reference("hgnc", hgnc_id, hgnc_symbol))

        yield term
示例#3
0
文件: pid.py 项目: ddomingof/pyobo
def iter_terms() -> Iterable[Term]:
    """Iterate over NCI PID terms."""
    hgnc_id_to_name = get_id_name_mapping('hgnc')
    hgnc_name_to_id = {v: k for k, v in hgnc_id_to_name.items()}

    for uuid, cx in iter_networks(use_tqdm=True):
        name = None
        for node in iterate_aspect(cx, 'networkAttributes'):
            if node['n'] == 'name':
                name = node['v']

        term = Term(reference=Reference(prefix=PREFIX,
                                        identifier=uuid,
                                        name=name), )

        genes = set()
        for node in iterate_aspect(cx, 'nodes'):
            name, reference = node['n'], node['r']
            hgnc_id = hgnc_name_to_id.get(name)
            if hgnc_id:
                genes.add((hgnc_id, name))
            elif any(reference.startswith(x) for x in ('CHEBI:', 'cas:')):
                pass
            elif reference.startswith('uniprot:'):
                uniprot_id = reference[len('uniprot:'):]
                hgnc_id = get_hgnc_id(uniprot_id)
                if hgnc_id is None:  # this only happens for proteins that seem to be virus related
                    # TODO reinvestigate this later
                    logger.debug(
                        'uniprot could not map %s/%s/%s to HGNC',
                        name,
                        reference,
                        get_gene_name(uniprot_id, web_fallback=False),
                    )
                else:
                    name = hgnc_id_to_name[hgnc_id]
                    genes.add((hgnc_id, name))
            else:
                logger.debug(f'unmapped: {name}, {reference}')

        for hgnc_id, hgnc_symbol in genes:
            term.append_relationship(pathway_has_part,
                                     Reference('hgnc', hgnc_id, hgnc_symbol))

        yield term