Пример #1
0
def get_concept_categories():
    q = 'MATCH (x) RETURN DISTINCT x.category AS category, COUNT(*) AS frequency;'
    results = db.query(q)

    category_dict = {}
    for result in results:
        categories = utils.standardize(result['category'])
        for c in categories:
            if c in category_dict:
                category_dict[c] += result['frequency']
            else:
                category_dict[c] = result['frequency']

    categories = []
    sorted_results = sorted(category_dict.items(),
                            key=lambda k: k[1],
                            reverse=True)
    for category, frequency in sorted_results:
        uri = 'http://bioentity.io/vocab/{}'.format(camel_case(category))
        identifier = 'BLM:{}'.format(camel_case(category))
        categories.append(
            BeaconConceptCategory(id=identifier,
                                  uri=uri,
                                  frequency=frequency,
                                  category=category))

    return categories
Пример #2
0
def get_concepts(keywords, categories=None, size=None):
    size = size if size is not None and size > 0 else 100
    categories = categories if categories is not None else []

    q = """
    MATCH (n)
    WHERE
        (ANY (keyword IN {keywords} WHERE
            (ANY (name IN n.name WHERE LOWER(name) CONTAINS LOWER(keyword))))) AND
        (SIZE({categories}) = 0 OR
            ANY (category IN {categories} WHERE
            (ANY (name IN n.category WHERE LOWER(name) = LOWER(category)))))
    RETURN n
    LIMIT {limit}
    """

    nodes = db.query(q, Node, keywords=keywords, categories=categories, limit=size)

    concepts = []

    for node in nodes:
        if all(len(category) == 1 for category in node.category):
            node.category = [''.join(node.category)]
        categories = utils.standardize(node.category)
        concept = BeaconConcept(
            id=node.curie,
            name=node.name,
            categories=categories,
            description=node.description
        )

        concepts.append(concept)

    return concepts
Пример #3
0
def get_concept_details(concept_id):  # noqa: E501
    """get_concept_details

    Retrieves details for a specified concepts in the system, as specified by a (url-encoded) CURIE identifier of a concept known the given knowledge source.  # noqa: E501

    :param concept_id: (url-encoded) CURIE identifier of concept of interest
    :type concept_id: str

    :rtype: BeaconConceptWithDetails
    """
    q = """
    MATCH (n) WHERE LOWER(n.id)=LOWER({conceptId})
    RETURN
        n.id AS id,
        n.uri AS uri,
        n.iri AS iri,
        n.name AS name,
        n.category AS category,
        n.symbol AS symbol,
        n.description AS description,
        n.synonym AS synonyms,
        n.clique AS clique,
        n.xrefs AS xrefs,
        n AS node
    LIMIT 1
    """

    results = db.query(q, conceptId=concept_id)

    for result in results:
        uri = result['uri'] if result['uri'] is not None else result['iri']

        clique = utils.listify(result['clique'])
        xrefs = utils.listify(result['xrefs'])
        exact_matches = clique + xrefs
        exact_matches = utils.remove_all(exact_matches, result['id'])

        details_dict = create_details_dict(result['node'])
        details = []
        for key, value in details_dict.items():
            details.append(BeaconConceptDetail(
                tag=key,
                value=utils.stringify(value)
            ))

        return BeaconConceptWithDetails(
            id=result['id'],
            uri=utils.stringify(uri),
            name=utils.stringify(result['name']),
            categories=utils.standardize(result['category']),
            symbol=utils.stringify(result['symbol']),
            description=utils.stringify(result['description']),
            synonyms=utils.listify(result['synonyms']),
            exact_matches=exact_matches,
            details=details
        )
    else:
        return BeaconConceptWithDetails()
def get_namespaces():  # noqa: E501
    """get_namespaces
    Get a list of namespace (curie prefixes) mappings that this beacon can perform with its /exactmatches endpoint  # noqa: E501
    :rtype: List[LocalNamespace]
    """
    q = """
    MATCH (n)
    WITH
        split(n.id, ":")[0] AS prefix,
        FILTER(x IN n.xrefs WHERE x <> n.id) AS xrefs,
        FILTER(x IN n.clique WHERE x <> n.id) AS clique
    WITH
        prefix AS prefix,
        EXTRACT(id IN xrefs | split(id, ":")[0]) AS xref_prefixes,
        EXTRACT(id IN clique | split(id, ":")[0]) AS clique_prefixes
    UNWIND
        COALESCE(xref_prefixes, []) + COALESCE(clique_prefixes, []) As p
    RETURN DISTINCT prefix AS local_prefix, COLLECT(DISTINCT p) AS clique_prefixes, COUNT(*) AS frequency;
    """

    results = db.query(q)

    local_namespaces = []

    for result in results:
        local_prefix = result.get('local_prefix')
        clique_prefixes = result.get('clique_prefixes')
        frequency = result.get('frequency')

        namespaces = []
        for prefix in clique_prefixes:
            namespaces.append(Namespace(
                prefix=prefix,
                uri=prefix_to_uri(prefix)
            ))

        local_namespaces.append(LocalNamespace(
            local_prefix=local_prefix,
            clique_mappings=namespaces,
            frequency=frequency,
            uri=prefix_to_uri(local_prefix),
        ))

    return local_namespaces
Пример #5
0
def get_predicates():
    q = """
    MATCH (x)-[r]->(y)
    RETURN DISTINCT type(r) AS predicate, r.relation AS relation, COUNT(*) AS frequency;
    """

    results = db.query(q)

    results = sorted(results, key=lambda k: k['frequency'], reverse=True)

    predicates = []

    for result in results:
        predicates.append(
            BeaconPredicate(id=result['relation'],
                            edge_label=result['predicate'],
                            frequency=result['frequency']))

    return predicates
Пример #6
0
def get_concept_details(conceptId):
    q = """
    MATCH (n) WHERE LOWER(n.id)=LOWER({conceptId})
    RETURN
        n.id AS id,
        n.uri AS uri,
        n.iri AS iri,
        n.name AS name,
        n.category AS category,
        n.symbol AS symbol,
        n.description AS description,
        n.synonym AS synonyms,
        n.clique AS clique,
        n.xrefs AS xrefs
    LIMIT 1
    """

    results = db.query(q, conceptId=conceptId)

    for result in results:
        uri = result['uri'] if result['uri'] != None else result['iri']
        synonyms = result['synonyms'] if result['synonyms'] != None else []

        clique = result['clique'] if result['clique'] != None else []
        xrefs = result['xrefs'] if result['xrefs'] != None else []

        exact_matches = list(set(clique + xrefs))

        exact_matches = utils.remove_all(exact_matches, result['id'])

        categories = utils.standardize(result['category'])

        return BeaconConceptWithDetails(
            id=result['id'],
            uri=uri,
            name=result['name'],
            categories=categories,
            symbol=result['symbol'],
            description=result['description'],
            synonyms=result['synonyms'],
            exact_matches=exact_matches
        )
Пример #7
0
def get_knowledge_map():
    q = """
    MATCH (x)-[r]->(y)
    RETURN DISTINCT
        x.category AS subject_category,
        type(r) AS edge_label,
        r.relation AS relation,
        y.category AS object_category,
        r.negated AS negated,
        COUNT(*) AS frequency;
    """

    results = db.query(q)
    results = split_up_categories(results)
    add_up_duplicates(results)
    results = sorted(results, key=lambda k: k['frequency'], reverse=True)

    results = [
        d for d in results if not isinstance(d['subject_category'], list)
        and not isinstance(d['object_category'], list)
    ]

    maps = []
    for result in results:
        o = BeaconKnowledgeMapObject(category=result['object_category'],
                                     prefixes=[])

        p = BeaconKnowledgeMapPredicate(edge_label=result['edge_label'],
                                        relation=result['relation'],
                                        negated=bool(result['negated']))

        s = BeaconKnowledgeMapSubject(category=result['subject_category'],
                                      prefixes=[])

        maps.append(
            BeaconKnowledgeMapStatement(subject=s,
                                        predicate=p,
                                        object=o,
                                        frequency=result['frequency']))

    return maps
Пример #8
0
def get_exact_matches_to_concept_list(c):
    q = """
    MATCH (n) WHERE
        ANY(id IN {id_list} WHERE TOLOWER(n.id) = TOLOWER(id))
    RETURN
        n.id AS id,
        n.xrefs AS xrefs,
        n.clique AS clique
    """

    results = db.query(q, id_list=c)
    exact_match_responses = []
    for result in results:
        c.remove(result['id'])

        exact_matches = []

        if isinstance(result['xrefs'], (list, tuple, set)):
            exact_matches += result['xrefs']

        if isinstance(result['clique'], (list, tuple, set)):
            exact_matches += result['clique']

        exact_matches = utils.remove_all(exact_matches, result['id'])

        exact_match_responses.append(ExactMatchResponse(
            id=result['id'],
            within_domain=True,
            has_exact_matches=list(set(exact_matches))
        ))

    for curie_id in c:
        exact_match_responses.append(ExactMatchResponse(
            id=curie_id,
            within_domain=False,
            has_exact_matches=[]
        ))

    return exact_match_responses
Пример #9
0
def prefix_map():
    """
    Returns a dictionary that maps lowercase prefixs to the case of prefixes
    as they appear in the database. Can be used to correct the case of an
    identifier.
    """
    from beacon_controller import database as db
    q="MATCH (x) RETURN  DISTINCT split(x.id, ':')[0] AS prefix"
    results = db.query(q)

    d = {}
    for result in results:
        prefix = result['prefix']

        if prefix in d:
            logger.warn('Identifier prefix {} appears in the database with multiple cases'.format(prefix))

        if isinstance(prefix, str):
            d[prefix.lower()] = prefix
        else:
            d[prefix] = prefix

    return d
Пример #10
0
def get_statements(s,
                   edge_label=None,
                   relation=None,
                   t=None,
                   keywords=None,
                   categories=None,
                   size=None):
    size = 100 if size == None or size < 1 else size

    q = """
    MATCH (n)-[r]-(m)
    WHERE
        ANY(id IN {sources} WHERE TOLOWER(n.id) = TOLOWER(id)) AND
        ({targets} IS NULL OR ANY(id IN {targets} WHERE TOLOWER(m.id) = TOLOWER(id))) AND
        ({edge_label} IS NULL OR type(r) = {edge_label})
    RETURN
        n AS source,
        m AS target,
        EXISTS((n)-[r]->(m)) AS source_is_subject,
        type(r) AS type,
        r.edge_label AS edge_label,
        r.relation AS relation,
        r.negated AS negated,
        r.id AS statement_id
    LIMIT {limit}
    """

    results = db.query(q,
                       sources=s,
                       targets=t,
                       edge_label=edge_label,
                       relation=relation,
                       keywords=keywords,
                       categories=categories,
                       limit=size)

    statements = []

    for result in results:
        if result['source_is_subject']:
            s, o = result['source'], result['target']
        else:
            o, s = result['source'], result['target']

        s_categories = utils.standardize(s['category'])
        o_categories = utils.standardize(o['category'])

        if result['edge_label'] != None:
            edge_label = result['edge_label']
        else:
            edge_label = result['type']

        beacon_subject = BeaconStatementSubject(id=s['id'],
                                                name=s['name'],
                                                categories=s_categories)

        beacon_predicate = BeaconStatementPredicate(
            edge_label=edge_label,
            relation=result['relation'],
            negated=result['negated'])

        beacon_object = BeaconStatementObject(id=o['id'],
                                              name=o['name'],
                                              categories=o_categories)

        statement_id = result['statement_id']
        if statement_id == None:
            statement_id = '{}:{}:{}'.format(s['id'], edge_label, o['id'])

        statements.append(
            BeaconStatement(id=statement_id,
                            subject=beacon_subject,
                            predicate=beacon_predicate,
                            object=beacon_object))

    return statements
Пример #11
0
def get_statement_details(statementId, keywords=None, size=None):
    statement_components = statementId.split(':')

    if len(statement_components) == 2:
        q = """
        MATCH (s)-[r {id: {statement_id}}]-(o)
        RETURN s AS subject, r AS relation, o AS object
        LIMIT 1;
        """
        results = db.query(q, statement_id=statementId)

    elif len(statement_components) == 5:
        s_prefix, s_num, edge_label, o_prefix, o_num = statement_components
        subject_id = '{}:{}'.format(s_prefix, s_num)
        object_id = '{}:{}'.format(o_prefix, o_num)
        q = """
        MATCH (s {id: {subject_id}})-[r]-(o {id: {object_id}})
        WHERE
            TOLOWER(type(r)) = TOLOWER({edge_label}) OR
            TOLOWER(r.edge_label) = TOLOWER({edge_label})
        RETURN
            s AS subject,
            r AS relation,
            o AS object
        LIMIT 1;
        """
        results = db.query(q,
                           subject_id=subject_id,
                           object_id=object_id,
                           edge_label=edge_label)
    else:
        raise Exception(
            '{} must either be a curie, or curie:edge_label:curie'.format(
                statementId))

    for result in results:
        d = {}
        s = result['subject']
        r = result['relation']
        o = result['object']

        d['relationship_type'] = r.type

        populate_dict(d, s, 'subject')
        populate_dict(d, o, 'object')
        populate_dict(d, r)

        evidences = []
        if 'evidence' in r:
            for uri in r['evidence']:
                evidences.append(BeaconStatementCitation(uri=uri, ))
        if 'publications' in r:
            for pm_uri in r['publications']:
                evidences.append(BeaconStatementCitation(uri=pm_uri))

        annotations = []
        for key, value in d.items():
            annotations.append(
                BeaconStatementAnnotation(tag=key,
                                          value=utils.stringify(value)))

        return BeaconStatementWithDetails(
            id=statementId,
            is_defined_by=utils.stringify(r.get('is_defined_by', None)),
            provided_by=utils.stringify(r.get('provided_by', None)),
            qualifiers=r.get('qualifiers', None),
            annotation=annotations,
            evidence=evidences)
def get_statement_details(statement_id,
                          keywords=None,
                          offset=None,
                          size=None):  # noqa: E501
    """get_statement_details

    Retrieves a details relating to a specified concept-relationship statement include &#39;is_defined_by and &#39;provided_by&#39; provenance; extended edge properties exported as tag &#x3D; value; and any associated annotations (publications, etc.)  cited as evidence for the given statement.  # noqa: E501

    :param statement_id: (url-encoded) CURIE identifier of the concept-relationship statement (\&quot;assertion\&quot;, \&quot;claim\&quot;) for which associated evidence is sought
    :type statement_id: str
    :param keywords: an array of keywords or substrings against which to  filter annotation names (e.g. publication titles).
    :type keywords: List[str]
    :param offset: offset (cursor position) to next batch of annotation entries of amount &#39;size&#39; to return.
    :type offset: int
    :param size: maximum number of evidence citation entries requested by the client; if this  argument is omitted, then the query is expected to returned all of the available annotation for this statement
    :type size: int

    :rtype: BeaconStatementWithDetails
    """
    statement_components = statement_id.split(':')

    if len(statement_components) == 2:
        q = """
        MATCH (s)-[r {id: {statement_id}}]-(o)
        RETURN s AS subject, r AS relation, o AS object
        LIMIT 1;
        """
        results = db.query(q, statement_id=statement_id)

    elif len(statement_components) == 5:
        s_prefix, s_num, edge_label, o_prefix, o_num = statement_components
        subject_id = '{}:{}'.format(s_prefix, s_num)
        object_id = '{}:{}'.format(o_prefix, o_num)
        q = """
        MATCH (s {id: {subject_id}})-[r]-(o {id: {object_id}})
        WHERE
            TOLOWER(type(r)) = TOLOWER({edge_label}) OR
            TOLOWER(r.edge_label) = TOLOWER({edge_label})
        RETURN
            s AS subject,
            r AS relation,
            o AS object
        LIMIT 1;
        """
        results = db.query(q,
                           subject_id=subject_id,
                           object_id=object_id,
                           edge_label=edge_label)
    else:
        raise Exception(
            '{} must either be a curie, or curie:edge_label:curie'.format(
                statement_id))

    for result in results:
        d = {}
        s = result['subject']
        r = result['relation']
        o = result['object']

        d['relationship_type'] = r.type

        populate_dict(d, s, 'subject')
        populate_dict(d, o, 'object')
        populate_dict(d, r)

        evidences = []
        if 'evidence' in r:
            for uri in r['evidence']:
                evidences.append(
                    BeaconStatementCitation(uri=utils.stringify(uri), ))
        if 'publications' in r:
            publications = r['publications']
            if isinstance(publications, list):
                for publication in publications:
                    evidences.append(build_evidence(publication))
            else:
                evidences.append(build_evidence(publications))

        annotations = []
        for key, value in d.items():
            annotations.append(
                BeaconStatementAnnotation(tag=key,
                                          value=utils.stringify(value)))

        return BeaconStatementWithDetails(
            id=statement_id,
            is_defined_by=utils.stringify(r.get('is_defined_by', None)),
            provided_by=utils.stringify(r.get('provided_by', None)),
            qualifiers=r.get('qualifiers', None),
            annotation=annotations,
            evidence=evidences)
def get_statements(s=None,
                   s_keywords=None,
                   s_categories=None,
                   edge_label=None,
                   relation=None,
                   t=None,
                   t_keywords=None,
                   t_categories=None,
                   offset=None,
                   size=None):  # noqa: E501
    """get_statements

    Given a constrained set of some [CURIE-encoded](https://www.w3.org/TR/curie/) &#39;s&#39; (&#39;source&#39;) concept identifiers, categories and/or keywords (to match in the concept name or description), retrieves a list of relationship statements where either the subject or the object concept matches any of the input source concepts provided.  Optionally, a set of some &#39;t&#39; (&#39;target&#39;) concept identifiers, categories and/or keywords (to match in the concept name or description) may also be given, in which case a member of the &#39;t&#39; concept set should matchthe concept opposite an &#39;s&#39; concept in the statement. That is, if the &#39;s&#39; concept matches a subject, then the &#39;t&#39; concept should match the object of a given statement (or vice versa).  # noqa: E501

    :param s: An (optional) array set of [CURIE-encoded](https://www.w3.org/TR/curie/) identifiers of &#39;source&#39; (&#39;start&#39;) concepts possibly known to the beacon. Unknown CURIES should simply be ignored (silent match failure).
    :type s: List[str]
    :param s_keywords: An (optional) array of keywords or substrings against which to filter &#39;source&#39; concept names and synonyms
    :type s_keywords: List[str]
    :param s_categories: An (optional) array set of &#39;source&#39; concept categories (specified as Biolink name labels codes gene, pathway, etc.) to which to constrain concepts matched by the main keyword search (see [Biolink Model](https://biolink.github.io/biolink-model) for the full list of codes)
    :type s_categories: List[str]
    :param edge_label: (Optional) predicate edge label against which to constrain the search for statements (&#39;edges&#39;) associated with the given query seed concept. The predicate edge_names for this parameter should be as published by the /predicates API endpoint and must be taken from the minimal predicate (&#39;slot&#39;) list of the [Biolink Model](https://biolink.github.io/biolink-model).
    :type edge_label: str
    :param relation: (Optional) predicate relation against which to constrain the search for statements (&#39;edges&#39;) associated with the given query seed concept. The predicate relations for this parameter should be as published by the /predicates API endpoint and the preferred format is a CURIE  where one exists, but strings/labels acceptable. This relation may be equivalent to the edge_label (e.g. edge_label: has_phenotype, relation: RO:0002200), or a more specific relation in cases where the source provides more granularity (e.g. edge_label: molecularly_interacts_with, relation: RO:0002447)
    :type relation: str
    :param t: An (optional) array set of [CURIE-encoded](https://www.w3.org/TR/curie/) identifiers of &#39;target&#39; (&#39;opposite&#39; or &#39;end&#39;) concepts possibly known to the beacon. Unknown CURIEs should simply be ignored (silent match failure).
    :type t: List[str]
    :param t_keywords: An (optional) array of keywords or substrings against which to filter &#39;target&#39; concept names and synonyms
    :type t_keywords: List[str]
    :param t_categories: An (optional) array set of &#39;target&#39; concept categories (specified as Biolink name labels codes gene, pathway, etc.) to which to constrain concepts matched by the main keyword search (see [Biolink Model](https://biolink.github.io/biolink-model) for the full list of codes)
    :type t_categories: List[str]
    :param offset: offset (cursor position) to next batch of statements of amount &#39;size&#39; to return.
    :type offset: int
    :param size: maximum number of concept entries requested by the client; if this argument is omitted, then the query is expected to returned all  the available data for the query
    :type size: int

    :rtype: List[BeaconStatement]
    """
    if size is None:
        size = 100

    conjuncts = []
    unwinds = []
    data = {}

    if s is not None:
        unwinds.append("[x IN {sources} | toLower(x)] AS s")
        conjuncts.append("toLower(n.id) = s")
        data['sources'] = s

    if t is not None:
        unwinds.append("[x IN {targets} | toLower(x)] AS t")
        conjuncts.append("toLower(m.id) = t")
        data['targets'] = t

    if s_keywords is not None:
        unwinds.append("[x IN {s_keywords} | toLower(x)] AS s_keyword")
        disjuncts = [
            "toLower(n.name) CONTAINS s_keyword",
            "ANY(syn IN n.synonym WHERE toLower(syn) CONTAINS s_keyword)"
        ]
        conjuncts.append(" OR ".join(disjuncts))
        # conjuncts.append("toLower(n.name) CONTAINS s_keyword OR ANY(synonym IN n.synonym WHERE toLower(synonym) CONTAINS s_keyword)")
        data['s_keywords'] = s_keywords

    if t_keywords is not None:
        unwinds.append("[x IN {t_keywords} | toLower(x)] AS t_keyword")
        disjuncts = [
            "toLower(m.name) CONTAINS t_keyword",
            "ANY(syn IN m.synonym WHERE toLower(syn) CONTAINS t_keyword)"
        ]
        conjuncts.append(" OR ".join(disjuncts))
        # conjuncts.append("ANY(keyword in {t_keywords} WHERE keyword CONTAINS toLower(m.name))")
        # conjuncts.append("toLower(m.name) CONTAINS t_keyword OR ANY(synonym IN m.synonym WHERE toLower(synonym) CONTAINS t_keyword)")
        data['t_keywords'] = t_keywords

    if edge_label is not None:
        conjuncts.append("type(r) = {edge_label}")
        data['edge_label'] = edge_label

    if relation is not None:
        conjuncts.append("r.relation = {relation}")
        data['relation'] = relation

    if s_categories is not None:
        unwinds.append("[x IN {s_categories} | toLower(x)] AS s_category")
        conjuncts.append("s_category IN labels(n))")
        data['s_categories'] = s_categories

    if t_categories is not None:
        unwinds.append("[x IN {t_categories} | toLower(x)] AS t_category")
        conjuncts.append("t_category IN labels(m)")
        data['t_categories'] = t_categories

    q = "MATCH (n)-[r]->(m)"

    if unwinds != []:
        q = "UNWIND " + ' UNWIND '.join(unwinds) + " " + q

    if conjuncts != []:
        q = q + " WHERE (" + ') AND ('.join(conjuncts) + ")"

    q += """
    RETURN
        n AS subject,
        m AS object,
        type(r) AS edge_type,
        r.edge_label AS edge_label,
        r.relation AS relation,
        r.negated AS negated,
        r.id AS statement_id
    """

    if isinstance(offset, int) and offset >= 0:
        q += f' SKIP {offset}'
    if isinstance(size, int) and size >= 1:
        q += f' LIMIT {size}'

    results = db.query(q, **data)

    statements = []

    for result in results:
        s, o = result['subject'], result['object']

        s_categories = utils.standardize(s['category'])
        o_categories = utils.standardize(o['category'])

        if result['edge_label'] != None:
            edge_label = utils.stringify(result['edge_label'])
        else:
            edge_label = utils.stringify(result['edge_type'])

        beacon_subject = BeaconStatementSubject(
            id=s['id'],
            name=utils.stringify(s['name']),
            categories=utils.standardize(s['category']))

        beacon_predicate = BeaconStatementPredicate(
            edge_label=edge_label,
            relation=utils.stringify(result['relation']),
            negated=bool(result['negated']))

        beacon_object = BeaconStatementObject(id=o['id'],
                                              name=utils.stringify(o['name']),
                                              categories=utils.standardize(
                                                  o['category']))

        statement_id = result['statement_id']
        if statement_id == None:
            statement_id = '{}:{}:{}'.format(s['id'], edge_label, o['id'])

        statements.append(
            BeaconStatement(id=statement_id,
                            subject=beacon_subject,
                            predicate=beacon_predicate,
                            object=beacon_object))

    return statements
Пример #14
0
def get_concepts(keywords=None, categories=None, offset=None, size=None):  # noqa: E501
    """get_concepts

    Retrieves a list of whose concept in the beacon knowledge base with names and/or synonyms matching a set of keywords or substrings. The results returned should generally be returned in order of the quality of the match, that is, the highest ranked concepts should exactly match the most keywords, in the same order as the keywords were given. Lower quality hits with fewer keyword matches or out-of-order keyword matches, should be returned lower in the list.  # noqa: E501

    :param keywords: (Optional) array of keywords or substrings against which to match concept names and synonyms
    :type keywords: List[str]
    :param categories: (Optional) array set of concept categories - specified as Biolink name labels codes gene, pathway, etc. - to which to constrain concepts matched by the main keyword search (see [Biolink Model](https://biolink.github.io/biolink-model) for the full list of terms)
    :type categories: List[str]
    :param offset: offset (cursor position) to next batch of statements of amount &#39;size&#39; to return.
    :type offset: int
    :param size: maximum number of concept entries requested by the client; if this argument is omitted, then the query is expected to returned all the available data for the query
    :type size: int

    :rtype: List[BeaconConcept]
    """
    if size is None:
        size = 100;

    conjuncts = []
    unwinds = []
    data = {}

    if keywords is not None:
        unwinds.append("[x IN {keywords} | toLower(x)] AS keyword")
        disjuncts = [
            "toLower(n.name) CONTAINS keyword",
            "ANY(syn IN n.synonym WHERE toLower(syn) CONTAINS keyword)"
        ]
        conjuncts.append(" OR ".join(disjuncts))
        data['keywords'] = keywords

    if categories is not None:
        unwinds.append("[x IN {categories} | toLower(x)] AS category")
        conjuncts.append("ANY(category IN {categories} WHERE category IN labels(n))")
        data['categories'] = categories

    q = "MATCH (n)"

    if unwinds != []:
        q = "UNWIND " + ' UNWIND '.join(unwinds) + " " + q

    if conjuncts != []:
        q = q + " WHERE (" + ') AND ('.join(conjuncts) + ")"

    q += " RETURN n"

    if isinstance(offset, int) and offset >= 0:
        q += f' SKIP {offset}'
    if isinstance(size, int) and size >= 1:
        q += f' LIMIT {size}'

    nodes = db.query(q, Node, keywords=keywords, categories=categories, limit=size)

    concepts = []

    for node in nodes:
        if all(len(category) == 1 for category in node.category):
            node.category = [''.join(node.category)]
        categories = utils.standardize(node.category)
        concept = BeaconConcept(
            id=node.curie,
            name=node.name,
            categories=categories,
            description=node.description
        )

        concepts.append(concept)

    return concepts
Пример #15
0
def get_exact_matches_to_concept_list(c):  # noqa: E501
    """get_exact_matches_to_concept_list

    Given an input array of [CURIE](https://www.w3.org/TR/curie/) identifiers of known exactly matched concepts [*sensa*-SKOS](http://www.w3.org/2004/02/skos/core#exactMatch), retrieves the list of [CURIE](https://www.w3.org/TR/curie/) identifiers of additional concepts that are deemed by the given knowledge source to be exact matches to one or more of the input concepts **plus** whichever concept identifiers from the input list were specifically matched to these additional concepts, thus giving the whole known set of equivalent concepts known to this particular knowledge source.  If an empty set is returned, the it can be assumed that the given knowledge source does not know of any new equivalent concepts matching the input set. The caller of this endpoint can then decide whether or not to treat  its input identifiers as its own equivalent set.  # noqa: E501

    :param c: an array set of [CURIE-encoded](https://www.w3.org/TR/curie/) identifiers of concepts thought to be exactly matching concepts, to be used in a search for additional exactly matching concepts [*sensa*-SKOS](http://www.w3.org/2004/02/skos/core#exactMatch).
    :type c: List[str]

    :rtype: List[ExactMatchResponse]
    """
    c = [utils.fix_curie(curie) for curie in c]

    q = """
    UNWIND {id_list} AS input_id
    MATCH (n) WHERE
        n.id = input_id OR
        input_id IN n.xrefs OR
        input_id IN n.clique
    RETURN
        input_id AS input_id,
        n.id AS match_id,
        n.xrefs AS xrefs,
        n.clique AS clique;
    """

    results = db.query(q, id_list=c)

    exactmatch_dict = defaultdict(set)

    for result in results:
        input_id = result.get('input_id')
        match_id = result.get('match_id')
        clique = result.get('clique')
        xrefs = result.get('xrefs')

        if isinstance(match_id, str):
            exactmatch_dict[input_id].add(match_id)

        if isinstance(clique, (list, tuple, set)):
            exactmatch_dict[input_id].update(clique)

        if isinstance(xrefs, (list, tuple, set)):
            exactmatch_dict[input_id].update(xrefs)

    exactmatch_responses = []

    for curie in c:
        if curie in exactmatch_dict:
            exactmatch_responses.append(ExactMatchResponse(
                id=curie,
                within_domain=True,
                has_exact_matches=list(exactmatch_dict[curie])
            ))
        else:
            exactmatch_responses.append(ExactMatchResponse(
                id=curie,
                within_domain=False,
                has_exact_matches=[]
            ))

    return exactmatch_responses