Пример #1
0
def _process_concept(*, concept, node=None) -> bool:
    """Process a node JSON object."""
    namespace = concept[NAMESPACE]
    if namespace.lower() in {'text', 'fixme'}:
        return False

    prefix = normalize_prefix(namespace)
    if prefix is None:
        logger.warning('could not normalize namespace %s in concept %s', namespace, concept)
        return False

    concept[NAMESPACE] = prefix

    identifier = concept.get(IDENTIFIER)
    name = concept.get(NAME)
    if identifier:  # don't trust whatever was put for the name, even if it's available
        map_success = _handle_identifier_not_name(concept=concept, prefix=prefix, identifier=identifier)
    else:
        map_success = _handle_name_and_not_identifier(concept=concept, prefix=prefix, name=name, node=node)

    if not map_success:
        return False

    _remap_by_identifier(concept)
    return True
Пример #2
0
def summarize_one(prefix: str):
    """Summarize the mappings."""
    prefix = normalize_prefix(prefix)
    in_df = summary_df.loc[summary_df[TARGET_PREFIX] == prefix,
                           [SOURCE_PREFIX, 'count']]
    out_df = summary_df.loc[summary_df[SOURCE_PREFIX] == prefix,
                            [TARGET_PREFIX, 'count']]
    return f'''
Пример #3
0
def _process_annotations(data, add_free_annotations: bool = False):
    x = []
    y = []

    for prefix, names in data[ANNOTATIONS].items():
        if prefix == 'CellLine':
            efo_name_to_id = get_name_id_mapping('efo')
            # clo_name_to_id = get_name_id_mapping('clo')  # FIXME implement CLO import
            for name, polarity in names.items():
                prefix, identifier = 'efo', efo_name_to_id.get(name)
                # if identifier is None:
                #     prefix, identifier = 'clo', clo_name_to_id.get(name)
                if identifier is not None:
                    x.append((Entity(namespace=prefix, identifier=identifier, name=name), polarity))
                else:
                    y.append((prefix, identifier, polarity))

        elif prefix in _BEL_ANNOTATION_PREFIX_MAP:
            prefix = _BEL_ANNOTATION_PREFIX_MAP[prefix]
            _process_findable_annotations(x, y, prefix, names)

        elif normalize_prefix(prefix):
            prefix_norm = normalize_prefix(prefix)
            _process_findable_annotations(x, y, prefix_norm, names)

        else:
            if prefix not in _UNHANDLED_ANNOTATION:
                logger.warning('unhandled annotation: %s', prefix)
                _UNHANDLED_ANNOTATION.add(prefix)

            if isinstance(names, dict):
                for name, polarity in names.items():
                    y.append((prefix, name, polarity))
            else:
                y.append((prefix, names, True))

    data[ANNOTATIONS] = defaultdict(dict)
    for entity, polarity in x:
        data[ANNOTATIONS][entity.namespace][entity.identifier] = polarity

    if add_free_annotations:
        data['free_annotations'] = defaultdict(dict)
        for prefix, name, polarity in y:
            data['free_annotations'][prefix][name] = polarity
Пример #4
0
def get_remapping() -> Mapping[Tuple[str, str], Tuple[str, str, str]]:
    """Get a mapping from database/identifier pairs to famplex identifiers."""
    df = _get_df()
    rv = {}
    for target_ns, target_id, source_id in df.values:
        remapped_prefix = normalize_prefix(target_ns)
        if remapped_prefix is None:
            logger.debug('could not remap %s', target_ns)
        else:
            rv[remapped_prefix, target_id] = 'fplx', source_id, source_id
    return rv
Пример #5
0
def _process_concept(
        *,
        concept,
        node=None,
        skip_namespaces: Optional[Collection[str]] = None) -> bool:
    """Process a node JSON object."""
    namespace = concept[NAMESPACE]
    if namespace.lower() in {'text', 'fixme'}:
        return False

    if skip_namespaces and namespace in skip_namespaces:
        return True

    prefix = normalize_prefix(namespace)
    if prefix is None:
        logger.warning('could not normalize namespace "%s" in concept "%s"',
                       namespace, concept)
        return False

    concept[NAMESPACE] = prefix

    identifier = concept.get(IDENTIFIER)
    name = concept.get(NAME)
    if identifier:  # don't trust whatever was put for the name, even if it's available
        map_success = _handle_identifier_not_name(
            concept=concept,
            prefix=prefix,
            identifier=identifier,
            skip_namespaces=skip_namespaces)
        if not map_success:  # just in case the name gets put in the identifier
            map_success = _handle_name_and_not_identifier(
                concept=concept,
                prefix=prefix,
                name=identifier,
                node=node,
                skip_namespaces=skip_namespaces,
            )
    else:
        map_success = _handle_name_and_not_identifier(
            concept=concept,
            prefix=prefix,
            name=name,
            node=node,
            skip_namespaces=skip_namespaces,
        )

    if not map_success:
        return False

    _remap_by_identifier(concept)
    return True
Пример #6
0
def _process_annotations(data, remove_ungrounded: bool = False) -> None:
    """Process the annotations in a PyBEL edge data dictionary."""
    grounded_category_curie_polarity = []
    ungrounded_category_name_polarity = []

    for category, names in data[ANNOTATIONS].items():
        if category == 'CellLine':
            _namespaces = [
                'efo',
                # 'clo',  # FIXME implement CLO import and add here
            ]
            for name, polarity in names.items():
                g_prefix, g_identifier, g_name = pyobo.ground(_namespaces, name)
                if g_prefix and g_identifier:
                    grounded_category_curie_polarity.append((
                        category, Entity(namespace=g_prefix, identifier=g_identifier, name=g_name), polarity,
                    ))
                else:
                    ungrounded_category_name_polarity.append((category, name, polarity))

        elif category in _BEL_ANNOTATION_PREFIX_MAP:
            norm_prefix = _BEL_ANNOTATION_PREFIX_MAP[category]
            norm_category = _BEL_ANNOTATION_PREFIX_CATEGORY_MAP.get(category, category)
            for name, polarity in names.items():
                _, identifier, _ = pyobo.ground(norm_prefix, name)
                if identifier:
                    grounded_category_curie_polarity.append((
                        norm_category, Entity(namespace=norm_prefix, identifier=identifier, name=name), polarity,
                    ))
                else:
                    ungrounded_category_name_polarity.append((norm_category, name, polarity))

        elif normalize_prefix(category):
            norm_prefix = normalize_prefix(category)
            for name, polarity in names.items():
                _, identifier, _ = pyobo.ground(norm_prefix, name)
                if identifier:
                    grounded_category_curie_polarity.append((
                        category, Entity(namespace=norm_prefix, identifier=identifier, name=name), polarity,
                    ))
                else:
                    ungrounded_category_name_polarity.append((category, name, polarity))

        else:
            if category not in _UNHANDLED_ANNOTATION:
                logger.warning('unhandled annotation: %s', category)
                _UNHANDLED_ANNOTATION.add(category)

            if isinstance(names, dict):
                for name, polarity in names.items():
                    ungrounded_category_name_polarity.append((category, name, polarity))
            else:
                ungrounded_category_name_polarity.append((category, names, True))

    data[ANNOTATIONS] = defaultdict(dict)
    for category, entity, polarity in grounded_category_curie_polarity:
        data[ANNOTATIONS][category][entity.curie] = polarity
    data[ANNOTATIONS] = dict(data[ANNOTATIONS])

    if not remove_ungrounded and ungrounded_category_name_polarity:
        data[FREE_ANNOTATIONS] = defaultdict(dict)
        for category, name, polarity in ungrounded_category_name_polarity:
            data[FREE_ANNOTATIONS][category][name] = polarity
        data[FREE_ANNOTATIONS] = dict(data[FREE_ANNOTATIONS])
Пример #7
0
def summarize_one(prefix: str):
    """Summarize the mappings."""
    prefix = normalize_prefix(prefix)
    in_df = summary_df.loc[summary_df['target_ns'] == prefix, ['source_ns', 'count']]
    out_df = summary_df.loc[summary_df['source_ns'] == prefix, ['target_ns', 'count']]
    return f'''