Exemplo n.º 1
0
def append_gilda_predictions(
    prefix: str,
    target_prefixes: Union[str, Iterable[str]],
    provenance: str,
    relation: str = "skos:exactMatch",
    custom_filter: Optional[CMapping] = None,
    unnamed: Optional[Iterable[str]] = None,
    identifiers_are_names: bool = False,
) -> None:
    """Add gilda predictions to the Biomappings predictions.tsv file.

    :param prefix: The source prefix
    :param target_prefixes: The target prefix or prefixes
    :param provenance: The provenance text. Typically generated with ``biomappings.utils.get_script_url(__file__)``.
    :param relation: The relationship. Defaults to ``skos:exactMatch``.
    :param custom_filter: A triple nested dictionary from source prefix to target prefix to source id to target id.
        Any source prefix, target prefix, source id combinations in this dictionary will be filtered.
    :param unnamed: An optional list of prefixes whose identifiers should be considered as names (e.g., CCLE, FPLX)
    :param identifiers_are_names: The source prefix's identifiers should be considered as names
    """
    grounder = get_grounder(target_prefixes, unnamed=unnamed)
    predictions = iter_prediction_tuples(
        prefix,
        relation=relation,
        grounder=grounder,
        provenance=provenance,
        identifiers_are_names=identifiers_are_names,
    )
    if custom_filter is not None:
        predictions = filter_custom(predictions, custom_filter)
    predictions = filter_pyobo(predictions, prefix, target_prefixes)
    predictions = sorted(predictions, key=_key)
    append_prediction_tuples(predictions)
Exemplo n.º 2
0
def dump_predictions():
    # source prefix, source identifier, source name, relation
    # target prefix, target identifier, target name, type, source
    source_prefix = 'pr'
    target_prefix = 'uniprot.chain'
    relation = 'skos:exactMatch'
    source = 'https://github.com/indralab/gilda/blob/master/scripts/' \
        'generate_uniprot_chain_proonto_mappings.py'
    match_type = 'lexical'
    rows = []
    pro = obonet.read_obo(PROONTO_OBO)
    for pro_id, matches in matches_per_id.items():
        target_id = matches[0].term.id
        target_name = matches[0].term.entry_name
        source_name = pro.nodes[pro_id]['name']
        row = (source_prefix, pro_id, source_name, relation, target_prefix,
               target_id, target_name, match_type, 0.8, source)
        rows.append(row)
    append_prediction_tuples(rows, deduplicate=True)

def iter_gilda_prediction_tuples(prefix: str,
                                 relation: str) -> Iterable[PredictionTuple]:
    """Iterate over prediction tuples for a given prefix."""
    provenance = get_script_url(__file__)
    id_name_mapping = pyobo.get_id_name_mapping(prefix)
    for identifier, name in tqdm(id_name_mapping.items(),
                                 desc=f'Mapping {prefix}'):
        for scored_match in gilda.ground(name):
            yield PredictionTuple(
                prefix,
                identifier,
                name,
                relation,
                scored_match.term.db.lower(),
                scored_match.term.id,
                scored_match.term.entry_name,
                'lexical',
                scored_match.score,
                provenance,
            )


if __name__ == '__main__':
    append_prediction_tuples(
        itt.chain.from_iterable(
            sorted(iter_gilda_prediction_tuples(prefix, 'speciesSpecific'),
                   key=lambda t: (t[0], t[2]))
            for prefix in ['reactome', 'wikipathways']))
    mapping_type = "lexical"
    match_type = "skos:exactMatch"
    confidence = 0.999
    for mesh_name, mesh_id in mesh_client.mesh_name_to_id.items():
        match = MESH_PROTEIN_RE.match(mesh_name)
        if not match:
            continue
        gene_name = match.groups()[0]
        hgnc_id = hgnc_client.get_hgnc_id(gene_name)
        if not hgnc_id:
            continue
        uniprot_id = hgnc_client.get_uniprot_id(hgnc_id)
        if not uniprot_id or "," in uniprot_id:
            continue
        yield PredictionTuple(
            "mesh",
            mesh_id,
            mesh_name,
            match_type,
            "uniprot",
            uniprot_id,
            gene_name,
            mapping_type,
            confidence,
            url,
        )


if __name__ == "__main__":
    append_prediction_tuples(get_mappings())
Exemplo n.º 5
0
from biomappings.resources import PredictionTuple, append_prediction_tuples
from biomappings.utils import get_script_url


def iterate_kegg_matches() -> Iterable[PredictionTuple]:
    """Iterate over predictions from KEGG Pathways to GO and MeSH."""
    provenance = get_script_url(__file__)
    id_name_mapping = ensure_list_pathways()
    for identifier, name in tqdm(id_name_mapping.items(),
                                 desc='Mapping KEGG Pathways'):
        for scored_match in gilda.ground(name):
            if scored_match.term.db.lower() not in {'go', 'mesh'}:
                continue

            yield (
                'kegg.pathway',
                identifier,
                name,
                'skos:exactMatch',
                scored_match.term.db.lower(),
                scored_match.term.id,
                scored_match.term.entry_name,
                'lexical',
                scored_match.score,
                provenance,
            )


if __name__ == '__main__':
    append_prediction_tuples(iterate_kegg_matches())