def append_gilda_predictions( prefix: str, target_prefixes: Union[str, Iterable[str]], provenance: str, relation: str = "skos:exactMatch", custom_filter: Optional[CMapping] = None, unnamed: Optional[Iterable[str]] = None, identifiers_are_names: bool = False, ) -> None: """Add gilda predictions to the Biomappings predictions.tsv file. :param prefix: The source prefix :param target_prefixes: The target prefix or prefixes :param provenance: The provenance text. Typically generated with ``biomappings.utils.get_script_url(__file__)``. :param relation: The relationship. Defaults to ``skos:exactMatch``. :param custom_filter: A triple nested dictionary from source prefix to target prefix to source id to target id. Any source prefix, target prefix, source id combinations in this dictionary will be filtered. :param unnamed: An optional list of prefixes whose identifiers should be considered as names (e.g., CCLE, FPLX) :param identifiers_are_names: The source prefix's identifiers should be considered as names """ grounder = get_grounder(target_prefixes, unnamed=unnamed) predictions = iter_prediction_tuples( prefix, relation=relation, grounder=grounder, provenance=provenance, identifiers_are_names=identifiers_are_names, ) if custom_filter is not None: predictions = filter_custom(predictions, custom_filter) predictions = filter_pyobo(predictions, prefix, target_prefixes) predictions = sorted(predictions, key=_key) append_prediction_tuples(predictions)
def dump_predictions(): # source prefix, source identifier, source name, relation # target prefix, target identifier, target name, type, source source_prefix = 'pr' target_prefix = 'uniprot.chain' relation = 'skos:exactMatch' source = 'https://github.com/indralab/gilda/blob/master/scripts/' \ 'generate_uniprot_chain_proonto_mappings.py' match_type = 'lexical' rows = [] pro = obonet.read_obo(PROONTO_OBO) for pro_id, matches in matches_per_id.items(): target_id = matches[0].term.id target_name = matches[0].term.entry_name source_name = pro.nodes[pro_id]['name'] row = (source_prefix, pro_id, source_name, relation, target_prefix, target_id, target_name, match_type, 0.8, source) rows.append(row) append_prediction_tuples(rows, deduplicate=True)
def iter_gilda_prediction_tuples(prefix: str, relation: str) -> Iterable[PredictionTuple]: """Iterate over prediction tuples for a given prefix.""" provenance = get_script_url(__file__) id_name_mapping = pyobo.get_id_name_mapping(prefix) for identifier, name in tqdm(id_name_mapping.items(), desc=f'Mapping {prefix}'): for scored_match in gilda.ground(name): yield PredictionTuple( prefix, identifier, name, relation, scored_match.term.db.lower(), scored_match.term.id, scored_match.term.entry_name, 'lexical', scored_match.score, provenance, ) if __name__ == '__main__': append_prediction_tuples( itt.chain.from_iterable( sorted(iter_gilda_prediction_tuples(prefix, 'speciesSpecific'), key=lambda t: (t[0], t[2])) for prefix in ['reactome', 'wikipathways']))
mapping_type = "lexical" match_type = "skos:exactMatch" confidence = 0.999 for mesh_name, mesh_id in mesh_client.mesh_name_to_id.items(): match = MESH_PROTEIN_RE.match(mesh_name) if not match: continue gene_name = match.groups()[0] hgnc_id = hgnc_client.get_hgnc_id(gene_name) if not hgnc_id: continue uniprot_id = hgnc_client.get_uniprot_id(hgnc_id) if not uniprot_id or "," in uniprot_id: continue yield PredictionTuple( "mesh", mesh_id, mesh_name, match_type, "uniprot", uniprot_id, gene_name, mapping_type, confidence, url, ) if __name__ == "__main__": append_prediction_tuples(get_mappings())
from biomappings.resources import PredictionTuple, append_prediction_tuples from biomappings.utils import get_script_url def iterate_kegg_matches() -> Iterable[PredictionTuple]: """Iterate over predictions from KEGG Pathways to GO and MeSH.""" provenance = get_script_url(__file__) id_name_mapping = ensure_list_pathways() for identifier, name in tqdm(id_name_mapping.items(), desc='Mapping KEGG Pathways'): for scored_match in gilda.ground(name): if scored_match.term.db.lower() not in {'go', 'mesh'}: continue yield ( 'kegg.pathway', identifier, name, 'skos:exactMatch', scored_match.term.db.lower(), scored_match.term.id, scored_match.term.entry_name, 'lexical', scored_match.score, provenance, ) if __name__ == '__main__': append_prediction_tuples(iterate_kegg_matches())