def _process_concept(*, concept, node=None) -> bool: """Process a node JSON object.""" namespace = concept[NAMESPACE] if namespace.lower() in {'text', 'fixme'}: return False prefix = normalize_prefix(namespace) if prefix is None: logger.warning('could not normalize namespace %s in concept %s', namespace, concept) return False concept[NAMESPACE] = prefix identifier = concept.get(IDENTIFIER) name = concept.get(NAME) if identifier: # don't trust whatever was put for the name, even if it's available map_success = _handle_identifier_not_name(concept=concept, prefix=prefix, identifier=identifier) else: map_success = _handle_name_and_not_identifier(concept=concept, prefix=prefix, name=name, node=node) if not map_success: return False _remap_by_identifier(concept) return True
def summarize_one(prefix: str): """Summarize the mappings.""" prefix = normalize_prefix(prefix) in_df = summary_df.loc[summary_df[TARGET_PREFIX] == prefix, [SOURCE_PREFIX, 'count']] out_df = summary_df.loc[summary_df[SOURCE_PREFIX] == prefix, [TARGET_PREFIX, 'count']] return f'''
def _process_annotations(data, add_free_annotations: bool = False): x = [] y = [] for prefix, names in data[ANNOTATIONS].items(): if prefix == 'CellLine': efo_name_to_id = get_name_id_mapping('efo') # clo_name_to_id = get_name_id_mapping('clo') # FIXME implement CLO import for name, polarity in names.items(): prefix, identifier = 'efo', efo_name_to_id.get(name) # if identifier is None: # prefix, identifier = 'clo', clo_name_to_id.get(name) if identifier is not None: x.append((Entity(namespace=prefix, identifier=identifier, name=name), polarity)) else: y.append((prefix, identifier, polarity)) elif prefix in _BEL_ANNOTATION_PREFIX_MAP: prefix = _BEL_ANNOTATION_PREFIX_MAP[prefix] _process_findable_annotations(x, y, prefix, names) elif normalize_prefix(prefix): prefix_norm = normalize_prefix(prefix) _process_findable_annotations(x, y, prefix_norm, names) else: if prefix not in _UNHANDLED_ANNOTATION: logger.warning('unhandled annotation: %s', prefix) _UNHANDLED_ANNOTATION.add(prefix) if isinstance(names, dict): for name, polarity in names.items(): y.append((prefix, name, polarity)) else: y.append((prefix, names, True)) data[ANNOTATIONS] = defaultdict(dict) for entity, polarity in x: data[ANNOTATIONS][entity.namespace][entity.identifier] = polarity if add_free_annotations: data['free_annotations'] = defaultdict(dict) for prefix, name, polarity in y: data['free_annotations'][prefix][name] = polarity
def get_remapping() -> Mapping[Tuple[str, str], Tuple[str, str, str]]: """Get a mapping from database/identifier pairs to famplex identifiers.""" df = _get_df() rv = {} for target_ns, target_id, source_id in df.values: remapped_prefix = normalize_prefix(target_ns) if remapped_prefix is None: logger.debug('could not remap %s', target_ns) else: rv[remapped_prefix, target_id] = 'fplx', source_id, source_id return rv
def _process_concept( *, concept, node=None, skip_namespaces: Optional[Collection[str]] = None) -> bool: """Process a node JSON object.""" namespace = concept[NAMESPACE] if namespace.lower() in {'text', 'fixme'}: return False if skip_namespaces and namespace in skip_namespaces: return True prefix = normalize_prefix(namespace) if prefix is None: logger.warning('could not normalize namespace "%s" in concept "%s"', namespace, concept) return False concept[NAMESPACE] = prefix identifier = concept.get(IDENTIFIER) name = concept.get(NAME) if identifier: # don't trust whatever was put for the name, even if it's available map_success = _handle_identifier_not_name( concept=concept, prefix=prefix, identifier=identifier, skip_namespaces=skip_namespaces) if not map_success: # just in case the name gets put in the identifier map_success = _handle_name_and_not_identifier( concept=concept, prefix=prefix, name=identifier, node=node, skip_namespaces=skip_namespaces, ) else: map_success = _handle_name_and_not_identifier( concept=concept, prefix=prefix, name=name, node=node, skip_namespaces=skip_namespaces, ) if not map_success: return False _remap_by_identifier(concept) return True
def _process_annotations(data, remove_ungrounded: bool = False) -> None: """Process the annotations in a PyBEL edge data dictionary.""" grounded_category_curie_polarity = [] ungrounded_category_name_polarity = [] for category, names in data[ANNOTATIONS].items(): if category == 'CellLine': _namespaces = [ 'efo', # 'clo', # FIXME implement CLO import and add here ] for name, polarity in names.items(): g_prefix, g_identifier, g_name = pyobo.ground(_namespaces, name) if g_prefix and g_identifier: grounded_category_curie_polarity.append(( category, Entity(namespace=g_prefix, identifier=g_identifier, name=g_name), polarity, )) else: ungrounded_category_name_polarity.append((category, name, polarity)) elif category in _BEL_ANNOTATION_PREFIX_MAP: norm_prefix = _BEL_ANNOTATION_PREFIX_MAP[category] norm_category = _BEL_ANNOTATION_PREFIX_CATEGORY_MAP.get(category, category) for name, polarity in names.items(): _, identifier, _ = pyobo.ground(norm_prefix, name) if identifier: grounded_category_curie_polarity.append(( norm_category, Entity(namespace=norm_prefix, identifier=identifier, name=name), polarity, )) else: ungrounded_category_name_polarity.append((norm_category, name, polarity)) elif normalize_prefix(category): norm_prefix = normalize_prefix(category) for name, polarity in names.items(): _, identifier, _ = pyobo.ground(norm_prefix, name) if identifier: grounded_category_curie_polarity.append(( category, Entity(namespace=norm_prefix, identifier=identifier, name=name), polarity, )) else: ungrounded_category_name_polarity.append((category, name, polarity)) else: if category not in _UNHANDLED_ANNOTATION: logger.warning('unhandled annotation: %s', category) _UNHANDLED_ANNOTATION.add(category) if isinstance(names, dict): for name, polarity in names.items(): ungrounded_category_name_polarity.append((category, name, polarity)) else: ungrounded_category_name_polarity.append((category, names, True)) data[ANNOTATIONS] = defaultdict(dict) for category, entity, polarity in grounded_category_curie_polarity: data[ANNOTATIONS][category][entity.curie] = polarity data[ANNOTATIONS] = dict(data[ANNOTATIONS]) if not remove_ungrounded and ungrounded_category_name_polarity: data[FREE_ANNOTATIONS] = defaultdict(dict) for category, name, polarity in ungrounded_category_name_polarity: data[FREE_ANNOTATIONS][category][name] = polarity data[FREE_ANNOTATIONS] = dict(data[FREE_ANNOTATIONS])
def summarize_one(prefix: str): """Summarize the mappings.""" prefix = normalize_prefix(prefix) in_df = summary_df.loc[summary_df['target_ns'] == prefix, ['source_ns', 'count']] out_df = summary_df.loc[summary_df['source_ns'] == prefix, ['target_ns', 'count']] return f'''