Python crawl示例，beacon_controller.crawler.crawl Python示例

示例#1

0

显示文件

文件： concepts_controller.py 项目： lhannest/biothings-explorer-beacon

def get_exact_matches_to_concept_list(c):
    response = []

    for curie in c:
        if not isinstance(curie, str) or ':' not in curie:
            continue

        exact_matches = []

        data = crawler.crawl(curie)

        for category, associations in data.items():
            for a in associations:
                if a.get('predicate') == 'EquivalentAssociation':
                    object_id = safe_get(a, 'object', 'id')

                    prefix, local_id = object_id.split(':', 1)

                    if 'name' not in prefix.lower():
                        exact_matches.append(object_id)

        response.append(
            ExactMatchResponse(id=curie,
                               within_domain=data != {},
                               has_exact_matches=exact_matches))

    return response

示例#2

0

显示文件

def get_statement_details(statementId, keywords=None, offset=None, size=None):
    if ':' not in statementId:
        return BeaconStatementWithDetails()

    p = statementId.split(':')

    if len(p) != 5:
        return BeaconStatementWithDetails()

    subject_id = '{}:{}'.format(p[0], p[1])
    predicate = '{}'.format(p[2])
    object_id = '{}:{}'.format(p[3], p[4])

    data = crawler.crawl(subject_id)

    for category, assocations in data.items():
        for a in assocations:
            object_match = object_id == simplify_curie(
                safe_get(a, 'object', 'id'))
            predicate_match = predicate == safe_get(a, 'predicate')

            edge_label = safe_get(a, 'edge', 'label')
            if edge_label is not None:
                edge_label = edge_label.replace(' ', '_')
                predicate = predicate.replace(' ', '_')

            label_match = predicate == edge_label

            if object_match and (label_match or predicate_match):
                provided_by = safe_get(a, 'edge', 'provided_by')
                probability = safe_get(a, 'edge', 'probability')
                predicate = safe_get(a, 'predicate')
                is_defined_by = safe_get(a, 'api')
                endpoint = safe_get(a, 'endpoint')

                annotations = []

                if probability is not None:
                    annotations.append(
                        BeaconStatementAnnotation(tag='probability',
                                                  value=probability))
                if predicate is not None:
                    annotations.append(
                        BeaconStatementAnnotation(tag='predicate',
                                                  value=predicate))
                if endpoint is not None:
                    annotations.append(
                        BeaconStatementAnnotation(tag='endpoint',
                                                  value=endpoint))
                    annotations.append(
                        BeaconStatementAnnotation(tag='endpoint_input',
                                                  value=subject_id))

                return BeaconStatementWithDetails(provided_by=provided_by,
                                                  is_defined_by=is_defined_by,
                                                  annotation=annotations)

    return BeaconStatementWithDetails()

示例#3

0

显示文件

文件： concepts_controller.py 项目： lhannest/biothings-explorer-beacon

def get_concept_details(conceptId):
    data = crawler.crawl(conceptId)

    if data == {}:
        return BeaconConceptWithDetails()

    names, descriptions, xrefs = [], [], []

    for category, associations in data.items():
        for a in associations:
            predicate = a.get('predicate')
            if predicate == 'EquivalentAssociation' or predicate == 'HasDescriptionAssociation':
                object_id = safe_get(a, 'object', 'id')
                prefix, local_id = object_id.split(':', 1)

                if 'name' in prefix.lower() or 'symbol' in prefix.lower():
                    names.append(local_id)
                if 'description' in prefix.lower():
                    descriptions.append(local_id)
                if not 'name' in prefix.lower(
                ) and not 'description' in prefix.lower():
                    xrefs.append(object_id)

    names = list(set(names))
    predicate_longest_under_sixty = lambda n: (len(n) > 60, -len(n))
    names.sort(key=predicate_longest_under_sixty)

    prefix, _ = conceptId.split(':', 1)

    c = BeaconConceptWithDetails(id=conceptId,
                                 name=names[0] if len(names) >= 1 else None,
                                 synonyms=names[1:],
                                 exact_matches=xrefs,
                                 categories=[lookup_category(prefix)],
                                 description='; '.join(descriptions))

    return c

示例#4

0

显示文件

def get_statements(s=None,
                   s_keywords=None,
                   s_categories=None,
                   edge_label=None,
                   relation=None,
                   t=None,
                   t_keywords=None,
                   t_categories=None,
                   offset=None,
                   size=None):
    if s is None:
        abort(400,
              'Cannot search for statements without providing a subject ID')

    statements = []

    for subject_id in s:
        if ':' not in subject_id:
            continue

        data = crawler.crawl(subject_id)

        if data == {}:
            continue

        subject_name = find_subject_name(data)

        for category, associations in data.items():
            if category == 'null':
                category = None

            for a in associations:
                object_id = safe_get(a, 'object', 'id')
                if object_id != None and ':' in object_id:
                    object_prefix, _ = object_id.split(':', 1)
                    object_prefix = object_prefix.lower()
                    if 'name' in object_prefix or 'description' in object_prefix:
                        continue

                object_name = safe_get(a, 'object', 'label')
                if object_name == None:
                    secondary_id = safe_get(a, 'object', 'secondary-id')
                    if secondary_id != None and ':' in secondary_id:
                        secondary_prefix, symbol = secondary_id.split(':', 1)
                        object_name = symbol
                        if 'symbol' in secondary_prefix.lower():
                            taxonomy = safe_get(a, 'object', 'taxonomy')
                            if taxonomy != None:
                                taxonomy = ', '.join(t for t in taxonomy
                                                     if ':' not in t)
                                object_name += ' (taxonomy: {})'.format(
                                    taxonomy)

                subject_prefix, _ = subject_id.split(':', 1)

                predicate_name = safe_get(a, 'edge', 'label')
                if predicate_name == None:
                    predicate_name = safe_get(a, 'predicate')
                if predicate_name == 'EquivalentAssociation':
                    predicate_name = 'same_as'

                if isinstance(predicate_name, list):
                    if predicate_name != []:
                        predicate_name = predicate_name[0]
                    else:
                        predicate_name = blm.DEFAULT_EDGE_LABEL

                predicate_name = predicate_name.replace(' ', '_')

                object_id = simplify_curie(object_id)
                subject_id = simplify_curie(subject_id)

                statements.append(
                    build_statement(
                        object_id=object_id,
                        object_name=object_name,
                        object_category=category,
                        subject_id=subject_id,
                        subject_name=subject_name,
                        subject_category=utils.lookup_category(subject_prefix),
                        predicate_id=safe_get(a, 'edge', 'id'),
                        predicate_name=predicate_name))

    is_valid = build_filter(s, s_keywords, s_categories, edge_label, relation,
                            t, t_keywords, t_categories)

    statements = [s for s in statements if is_valid(s)]

    statements = remove_duplicates(statements)

    if offset is not None:
        statements = statements[offset:]
    if size is not None:
        statements = statements[:size]

    return statements