示例#1
0
def _handle_identifier_not_name(*, concept, prefix, identifier) -> bool:
    # Some namespaces are just too much of a problem at the moment to look up
    if prefix in SKIP:
        return False

    if prefix in NO_NAMES:
        concept[NAME] = concept[IDENTIFIER]
        return True

    if prefix == 'uniprot':
        concept[NAME] = get_mnemonic(identifier)
        return True

    try:
        id_name_mapping = get_id_name_mapping(prefix)
    except (NoOboFoundry, MissingOboBuild):
        return False

    if id_name_mapping is None:
        logger.warning('could not get names for prefix %s', prefix)
        return False
    name = id_name_mapping.get(identifier)
    if name is None:
        logger.warning('could not get name for %s:%s', prefix, identifier)
        return False
    concept[NAME] = name

    return True
示例#2
0
def get_uniprot_id_names(hgnc_id: str) -> Iterable[Tuple[str, str]]:
    """Get all of the UniProt identifiers for a given gene."""
    try:
        r = hgnc_id_to_up[str(hgnc_id)]
    except KeyError:
        tqdm.write(f'could not find HGNC:{hgnc_id}')
        return

    for _uniprot_id in r.split(', '):
        yield _uniprot_id, uniprot_client.get_mnemonic(_uniprot_id)
示例#3
0
def get_uniprot_id_names(hgnc_id: str) -> Iterable[Tuple[str, str]]:
    """Get all of the UniProt identifiers for a given gene."""
    try:
        r = hgnc_id_to_up[str(hgnc_id)]
    except KeyError:
        _k, _v = list(hgnc_id_to_up.items())[0]
        print(f'could not find {hgnc_id} ({type(hgnc_id)} in dict. Example: {_k} ({type(_k)}), {_v} ({type(_v)})')
        raise

    for _uniprot_id in r.split(', '):
        yield _uniprot_id, uniprot_client.get_mnemonic(_uniprot_id)
示例#4
0
def _process_interactor(s: str) -> Optional[Tuple[str, str, Optional[str]]]:
    if s.startswith('uniprotkb:'):
        uniprot_id = s[len('uniprotkb:'):]
        try:
            ncbigene_id = get_entrez_id(uniprot_id)
        except Exception:
            ncbigene_id = None
        if ncbigene_id:
            return 'ncbigene', ncbigene_id, pyobo.get_name(
                'ncbigene', ncbigene_id)
        return 'uniprot', uniprot_id, get_mnemonic(uniprot_id)
    if s.startswith('chebi:"CHEBI:'):
        chebi_id = s[len('chebi:"CHEBI:'):-1]
        return 'chebi', chebi_id, pyobo.get_name('chebi', chebi_id)
    if s.startswith('chembl target:'):
        return 'chembl.target', s[len('chembl target:'):-1], None
    if s.startswith('intact:'):
        prefix, identifier = 'intact', s[len('intact:'):]

        complexportal_identifier = _map_complexportal(identifier)
        if complexportal_identifier is not None:
            return 'complexportal', complexportal_identifier, None

        reactome_identifier = _map_reactome(identifier)
        if reactome_identifier is not None:
            return 'reactome', reactome_identifier, None

        _unhandled[prefix] += 1
        logger.debug('could not find complexportal/reactome mapping for %s:%s',
                     prefix, identifier)
        return prefix, identifier, None
    if s.startswith('intenz:'):
        return 'eccode', s[len('intenz:'):], None
    """
    Counter({'chebi': 9534,
         'ensembl': 3156,
         'refseq': 444,
         'ensemblgenomes': 439,
         'ddbj/embl/genbank': 204,
         'wwpdb': 163,
         'matrixdb': 102,
         'reactome': 87,
         'intenz': 43,
         'signor': 15,
         'chembl target': 11,
         'dip': 4,
         'entrezgene/locuslink': 2,
         'protein ontology': 2,
         'emdb': 2})
    """
    _unhandled[s.split(':')[0]] += 1
    if s not in _logged_unhandled:
        logger.warning('unhandled identifier: %s', s)
        _logged_unhandled.add(s)
示例#5
0
def get_name(prefix: str, identifier: str) -> Optional[str]:
    """Get the name for an entity."""
    if prefix == 'uniprot':
        from protmapper import uniprot_client
        return uniprot_client.get_mnemonic(identifier)

    try:
        id_name = get_id_name_mapping(prefix)
    except NoOboFoundry:
        id_name = None

    if not id_name:
        logger.warning('unable to look up names for prefix %s', prefix)
        return

    primary_id = get_primary_identifier(prefix, identifier)
    return id_name.get(primary_id)
示例#6
0
def _handle_identifier_not_name(
    *,
    concept,
    prefix,
    identifier,
    skip_namespaces: Optional[Collection[str]] = None,
) -> bool:
    # Some namespaces are just too much of a problem at the moment to look up
    if prefix in SKIP:
        return False
    if skip_namespaces and prefix in skip_namespaces:
        return True

    if prefix in NO_NAMES:
        concept[NAME] = concept[IDENTIFIER]
        return True

    if prefix == 'uniprot':
        concept[NAME] = get_mnemonic(identifier)
        return True

    try:
        id_name_mapping = pyobo.api.names.get_id_name_mapping(prefix)
    except NoBuild:
        return False

    if id_name_mapping is None:
        logger.warning('could not get names for prefix "%s"', prefix)
        return False

    name = id_name_mapping.get(identifier)
    if name is None:
        logger.warning('could not get name for curie %s:%s', prefix,
                       identifier)
        return False
    concept[NAME] = name

    return True
示例#7
0
def _handle_name_and_not_identifier(
    *,
    concept,
    prefix,
    name,
    node=None,
    skip_namespaces: Optional[Collection[str]] = None,
) -> bool:
    remapped_prefix, remapped_identifier, remapped_name = _get_name_remapping(
        prefix, name)
    if remapped_prefix:
        concept[NAMESPACE] = remapped_prefix
        concept[IDENTIFIER] = remapped_identifier
        concept[NAME] = remapped_name
        return True

    # Some namespaces are just too much of a problem at the moment to look up
    if prefix in SKIP:
        return False
    if skip_namespaces and prefix in skip_namespaces:
        return True

    concept[NAMESPACE] = prefix
    if prefix in NO_NAMES:
        concept[IDENTIFIER] = name
        return True

    if prefix == 'bel' and node is not None and KIND in node:
        kind = node[KIND]
        if kind == PMOD and name in pmod_mappings:
            # the 0th position xref is the preferred one (usually GO)
            _mapped = pmod_mappings[name]['xrefs'][0]
        elif kind == GMOD and name in gmod_mappings:
            _mapped = gmod_mappings[name]['xrefs'][0]
        else:
            raise ValueError(f'invalid kind: {kind}')
        concept[NAMESPACE] = _mapped[NAMESPACE]
        concept[IDENTIFIER] = _mapped[IDENTIFIER]
        concept[NAME] = _mapped[NAME]
        return True
    elif prefix == 'bel' and name in activity_mapping:
        _mapped = activity_mapping[name]
        concept[NAMESPACE] = _mapped[NAMESPACE]
        concept[IDENTIFIER] = _mapped[IDENTIFIER]
        concept[NAME] = _mapped[NAME]
        return True
    elif prefix == 'bel' and name in compartment_mapping:
        _mapped = compartment_mapping[name]
        concept[NAMESPACE] = _mapped[NAMESPACE]
        concept[IDENTIFIER] = _mapped[IDENTIFIER]
        concept[NAME] = _mapped[NAME]
        return True
    elif prefix == 'bel':
        logger.warning('could not figure out how to map bel ! "%s"', name)
        return False

    if prefix == 'uniprot':
        # assume identifier given as name
        identifier = get_id_from_mnemonic(name)
        if identifier is not None:
            concept[IDENTIFIER] = identifier
            return True

        mnemomic = get_mnemonic(name, web_fallback=False)
        if mnemomic is not None:
            concept[IDENTIFIER] = name
            concept[NAME] = mnemomic
            return True

        logger.warning('could not interpret uniprot name: "%s"', name)
        return False

    try:
        id_name_mapping = pyobo.api.names.get_name_id_mapping(prefix)
    except NoBuild as e:
        logger.warning('could not get namespace %s - %s', prefix, e)
        return False

    if id_name_mapping is None:
        logger.warning('unhandled namespace in %s ! %s', prefix, name)
        return False

    identifier = id_name_mapping.get(name)
    if identifier is None:
        logger.warning('could not find name "%s" in namespace "%s"', name,
                       prefix)
        return False

    concept[IDENTIFIER] = identifier
    concept[NAME] = name
    return True
示例#8
0
def test_get_mnemonic():
    mnemonic = uniprot_client.get_mnemonic('Q02750')
    assert mnemonic == 'MP2K1_HUMAN'
示例#9
0
    def get_psp_mapping(self, orig_id, query_id, gene_name, res, pos,
                        query_pos, mapping_code):
        """
        Wrapper around Phosphosite queries that performs peptide remapping.

        The function is called with a uniprot ID, residue, and position
        combination that is used to query the phosphosite_client for a valid
        corresponding site on the human reference protein. The `mapping_code`
        is provided by the caller to indicate the type of mapping being
        attempted (e.g., human isoform, mouse, rat, methionine). If a valid
        mapping is obtained, this is the error code that is applied.  If a
        valid mapping is obtained but it is for a human isoform, this indicates
        that the queried site exists only on a human isoform and not on the
        human reference protein, and the code `ISOFORM_SPECIFIC_SITE` is used.
        If the site returned by the phosphosite_client is at a position that
        does not match the Uniprot reference sequence (which can happen when
        the queried site and the PhosphositePlus protein sequences both exclude
        the initial methionine), the site is remapped to the Uniprot reference
        sequence using the peptide information for the site in PhosphositePlus.
        In these cases, the mapping code `REMAPPED_FROM_PSP_SEQUENCE` is used.

        Parameters
        ----------
        orig_id : str
            Original Uniprot ID of the protein to be mapped.
        query_id : str
            Uniprot ID of the protein being queried for sites. This may differ
            from `orig_id` if the orthologous mouse or rat protein is being
            checked for sites.
        gene_name : str
            Gene name of the protein.
        res : str
            Residue of the site to be mapped.
        pos : str
            Position of the site to be mapped.
        query_pos : str
            Position being queried for a mapping. This differs from `pos`
            when off-by-one (methionine) errors are being checked.
        mapping_code : str
            Mapping code to apply in case of a successful mapping, e.g.
            `INFERRED_ALTERNATIVE_ISOFORM`, `INFERRED_MOUSE_SITE`, etc.

        Returns
        -------
        MappedSite or None
            MappedSite object containing the mapping, or None indicating
            that no mapping was found.
        """
        pspmapping = phosphosite_client.map_to_human_site(query_id, res,
                                                          query_pos)
        # If no mapping, return None
        if pspmapping is None:
            return None
        # If there is a mapping, check to make sure that it is valid wrt to the
        # reference sequence
        human_pos = pspmapping.mapped_pos

        # Check if the site mapped from PSP is valid in the Uniprot sequence
        # for the ID that we're interested in
        # PSP sometimes returns a non-UP ID like NP_001184222 which we want
        # to control for here, we do that by looking up the mnemonic
        if not uniprot_client.get_mnemonic(pspmapping.mapped_id,
                                           web_fallback=False):
            return MappedSite(orig_id, None, res, pos,
                              error_code='PSP_MAPPED_ID_NOT_UP')
        # At this point the ID is supposed to be valid UP
        try:
            site_valid = uniprot_client.verify_location(pspmapping.mapped_id,
                                      pspmapping.mapped_res,
                                      pspmapping.mapped_pos)
            error_code = None
        except HTTPError as ex:
            if ex.response.status_code == 404:
                error_code = 'UNIPROT_HTTP_NOT_FOUND'
            else:
                error_code = 'UNIPROT_HTTP_OTHER'
        except Exception as ex:
            error_code = 'UNIPROT_OTHER'
            logger.error(ex)
        if error_code:
            # Set error_code; valid will set to None, not True/False
            mapped_site = MappedSite(orig_id, None, res, pos,
                                     error_code=error_code)
            return mapped_site

        # If the mapped site is valid, we're done!
        if site_valid:
            # If the residue is different, change the code accordingly
            mapped_site = MappedSite(orig_id, False, res, pos,
                              mapped_id=pspmapping.mapped_id,
                              mapped_res=pspmapping.mapped_res,
                              mapped_pos=human_pos,
                              description=mapping_code, gene_name=gene_name)
        else:
            # If mapped site is invalid, attempt to re-map based on the seq
            updated_pos = ProtMapper.map_peptide(orig_id, pspmapping.motif,
                                                 pspmapping.respos)
            # If the re-mapping fails, we give up
            if updated_pos is None:
                return None
            # Otherwise, we update to the mapped position
            updated_pos_1x = str(updated_pos + 1)
            mapped_site = MappedSite(orig_id, False, res, pos,
                              mapped_id=pspmapping.mapped_id,
                              mapped_res=pspmapping.mapped_res,
                              mapped_pos=updated_pos_1x, # Switch to 1-indexed
                              description='REMAPPED_FROM_PSP_SEQUENCE',
                              gene_name=gene_name)
        site_key = (orig_id, res, pos)
        self._cache[site_key] = mapped_site
        return mapped_site
示例#10
0
def _add_my_row(graph: BELGraph, row) -> None:
    relation = row['relation']
    source_uniprot_id = row['source']
    target_uniprot_id = row['target']

    pubmed_ids = row['pubmed_ids']
    pubmed_ids = pubmed_ids.split('|')

    source = pybel.dsl.Protein(
        namespace='uniprot',
        identifier=source_uniprot_id,
        name=get_mnemonic(source_uniprot_id),
    )
    target = pybel.dsl.Protein(
        namespace='uniprot',
        identifier=target_uniprot_id,
        name=get_mnemonic(target_uniprot_id),
    )

    for pubmed_id in pubmed_ids:
        if relation == 'deubiquitination':
            target_ub = target.with_variants(
                pybel.dsl.ProteinModification('Ub'))
            graph.add_decreases(
                source,
                target_ub,
                citation=pubmed_id,
                evidence='From intact',
            )
        elif relation == 'ubiqutination':
            target_ub = target.with_variants(
                pybel.dsl.ProteinModification('Ub'))
            graph.add_increases(
                source,
                target_ub,
                citation=...,
                evidence='From intact',
            )

        elif relation == 'degratation':
            graph.add_decreases(
                source,
                target,
                citation=...,
                evidence='From intact',
            )

        elif relation == 'activates':
            graph.add_increases(
                source,
                target,
                ...,
                object_modifier=pybel.dsl.activity(),
            )
        elif relation == 'co-expressed':
            graph.add_correlation(
                pybel.dsl.Rna(
                    namespace='uniprot',
                    identifier=source_uniprot_id,
                    name=get_mnemonic(source_uniprot_id),
                ),
                pybel.dsl.Rna(
                    namespace='uniprot',
                    identifier=target_uniprot_id,
                    name=get_mnemonic(target_uniprot_id),
                ),
                annotations=dict(cell_line={'HEK2': True}),
            )