Пример #1
0
 def test_movel_specific_matches_general(self, conn):
     novel_specific = 'CDKN2A:p.T18888888888888888888M'
     matches = match.match_positional_variant(conn, novel_specific)
     names = {m['displayName'] for m in matches}
     assert matches
     assert novel_specific not in names
     assert 'CDKN2A mutation' in names
Пример #2
0
 def test_match_explicit_references(self, conn):
     reference1 = conn.query({'target': 'Feature', 'filters': {'name': 'BCR'}})[0]['@rid']
     reference2 = conn.query({'target': 'Feature', 'filters': {'name': 'ABL1'}})[0]['@rid']
     matches = match.match_positional_variant(
         conn, 'fusion(e.13,e.3)', reference1=reference1, reference2=reference2
     )
     assert matches
Пример #3
0
 def test_known_fusions(self, conn, known_variant, related_variants):
     matches = match.match_positional_variant(conn, known_variant)
     types_selected = [m['type']['name'] for m in matches]
     assert GENERAL_MUTATION not in types_selected
     names = {m['displayName'] for m in matches}
     assert matches
     assert known_variant in names
     for variant in related_variants:
         assert variant in names
Пример #4
0
 def test_known_variants(self, conn, known_variant, related_variants, unrelated_variants):
     matches = match.match_positional_variant(conn, known_variant)
     names = {m['displayName'] for m in matches}
     assert matches
     assert known_variant in names
     for variant in related_variants:
         assert variant in names
     for variant in unrelated_variants:
         assert variant not in names
Пример #5
0
from graphkb import GraphKBConnection
from graphkb.constants import BASE_RETURN_PROPERTIES, GENERIC_RETURN_PROPERTIES
from graphkb.match import match_positional_variant
from graphkb.util import convert_to_rid_list
from graphkb.vocab import get_term_tree

GKB_API_URL = 'https://pori-demo.bcgsc.ca/graphkb-api/api'
GKB_USER = '******'
GKB_PASSWORD = '******'

graphkb_conn = GraphKBConnection(GKB_API_URL, use_global_cache=False)
graphkb_conn.login(GKB_USER, GKB_PASSWORD)

variant_name = 'KRAS:p.G12D'
variant_matches = match_positional_variant(graphkb_conn, variant_name)

for match in variant_matches:
    print(variant_name, 'will match', match['displayName'])

# return properties should be customized to the users needs
return_props = (BASE_RETURN_PROPERTIES +
                ['sourceId', 'source.name', 'source.displayName'] +
                [f'conditions.{p}' for p in GENERIC_RETURN_PROPERTIES] +
                [f'subject.{p}' for p in GENERIC_RETURN_PROPERTIES] +
                [f'evidence.{p}' for p in GENERIC_RETURN_PROPERTIES] +
                [f'relevance.{p}' for p in GENERIC_RETURN_PROPERTIES] +
                [f'evidenceLevel.{p}' for p in GENERIC_RETURN_PROPERTIES])

statements = graphkb_conn.query({
    'target': 'Statement',
    'filters': {
Пример #6
0
 def test_genomic_coordinates(self, conn):
     genomic = 'X:g.100611165A>T'
     match.match_positional_variant(conn, genomic)
Пример #7
0
 def test_known_fusion_single_gene_no_match(self, conn):
     known = '(BCR,?):fusion(e.13,e.?)'
     matches = match.match_positional_variant(conn, known)
     assert not matches
Пример #8
0
 def test_bad_gene2_name(self, conn):
     with pytest.raises(FeatureNotFoundError):
         match.match_positional_variant(
             conn,
             '(BCR,ME-AS-A-GENE):fusion(e.13,e.3)',
         )
Пример #9
0
 def test_match_explicit_reference1(self, conn):
     reference1 = conn.query({'target': 'Feature', 'filters': {'name': 'KRAS'}})[0]['@rid']
     matches = match.match_positional_variant(conn, 'p.G12D', reference1=reference1)
     assert matches
Пример #10
0
 def test_uncertain_position_not_supported(self, conn):
     with pytest.raises(NotImplementedError):
         match.match_positional_variant(
             conn,
             '(BCR,ABL1):fusion(e.13_24,e.3)',
         )
Пример #11
0
 def test_bad_gene_name(self, conn):
     with pytest.raises(FeatureNotFoundError):
         match.match_positional_variant(
             conn,
             'ME-AS-A-GENE:p.G12D',
         )
Пример #12
0
 def test_error_on_duplicate_reference2(self, conn):
     with pytest.raises(ValueError):
         match.match_positional_variant(
             conn, '(BCR,ABL1):fusion(e.13,e.3)', reference2='#123:34'
         )
Пример #13
0
 def test_error_on_bad_reference2(self, conn):
     with pytest.raises(ValueError):
         match.match_positional_variant(conn, 'KRAS:p.G12D', reference2='#123:34')
Пример #14
0
 def test_error_on_duplicate_reference1(self, conn):
     with pytest.raises(ValueError):
         match.match_positional_variant(conn, 'KRAS:p.G12D', '#123:34')
Пример #15
0
def annotate_variant(graphkb_conn: GraphKBConnection,
                     raw_variant_name: str,
                     include_unmatched: bool = False) -> List[Dict[str, str]]:
    results = []
    variant_name = convert_aa_3to1(raw_variant_name)

    if 'c.*' in variant_name:
        results.append({
            'variant':
            raw_variant_name,
            'error':
            f'skipping unsupported notation: {variant_name}'
        })
        return results

    print(f'processing: {variant_name}')

    try:
        variant_matches = match_positional_variant(graphkb_conn, variant_name)
    except FeatureNotFoundError:
        if include_unmatched:
            results.append({'variant': raw_variant_name})
        return results
    except Exception as err:
        results.append({'variant': raw_variant_name, 'error': str(err)})
        return results

    if variant_matches:
        print(f'{variant_name} matches {len(variant_matches)} variant records')
    # return properties should be customized to the users needs
    return_props = (BASE_RETURN_PROPERTIES +
                    ['sourceId', 'source.name', 'source.displayName'] +
                    [f'conditions.{p}' for p in GENERIC_RETURN_PROPERTIES] +
                    [f'subject.{p}' for p in GENERIC_RETURN_PROPERTIES] +
                    [f'evidence.{p}' for p in GENERIC_RETURN_PROPERTIES] +
                    [f'relevance.{p}' for p in GENERIC_RETURN_PROPERTIES] +
                    [f'evidenceLevel.{p}'
                     for p in GENERIC_RETURN_PROPERTIES] + ['reviewStatus'])

    statements = typing.cast(
        Statement,
        graphkb_conn.query({
            'target': 'Statement',
            'filters': {
                'conditions': convert_to_rid_list(variant_matches),
                'operator': 'CONTAINSANY',
            },
            'returnProperties': return_props,
        }),
    )
    if not statements:
        if include_unmatched:
            results.append({
                'variant_matches':
                ';'.join(sorted([v['displayName'] for v in variant_matches])),
                'variant':
                raw_variant_name,
            })
        return results
    print(f'{variant_name} matches {len(statements)} statements')

    for statement in statements:
        row = {
            'variant_matches':
            ';'.join(sorted([v['displayName'] for v in variant_matches])),
            'variant':
            raw_variant_name,
            'statement.relevance':
            statement['relevance']['displayName'],
            'statement.@rid':
            statement['@rid'],
            'statement.subject':
            statement['subject']['displayName'],
            'statement.source':
            statement['source']['displayName'] if statement['source'] else '',
            'statement.evidence':
            ';'.join(sorted([e['displayName']
                             for e in statement['evidence']])),
            'statement.conditions':
            ';'.join(
                sorted([e['displayName'] for e in statement['conditions']])),
            'statement.evidence_level':
            ';'.join(
                sorted([
                    e['displayName']
                    for e in (statement['evidenceLevel'] or [])
                ])),
            'statement.review_status':
            statement['reviewStatus'],
            'is_therapeutic':
            bool(statement['relevance']['@rid'] in therapeutic_terms),
        }
        results.append(row)
    return results
Пример #16
0
def annotate_positional_variants(
    graphkb_conn: GraphKBConnection,
    variants: List[IprVariant],
    disease_name: str,
    show_progress: bool = False,
) -> List[KbMatch]:
    """
    Annotate variant calls with information from GraphKB and return these annotations in the IPR
    alterations format

    Args:
        graphkb_conn (GraphKBConnection): the graphkb api connection object
        variants (list.<dict>): list of variants. Defaults to [].

    Returns:
        list of kbMatches records for IPR
    """
    errors = 0
    alterations = []
    problem_genes = set()

    iterfunc = progressbar if show_progress else iter
    for row in iterfunc(variants):
        variant = row['variant']

        if not row.get('gene', '') and (not row.get('gene1', '') or not row.get('gene2', '')):
            # https://www.bcgsc.ca/jira/browse/GERO-56?focusedCommentId=1234791&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-1234791
            # should not match single gene SVs
            continue

        try:
            matches = match_positional_variant(graphkb_conn, variant)

            for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_name):
                new_row = KbMatch({'variant': row['key'], 'variantType': row['variantType']})
                new_row.update(ipr_row)
                alterations.append(new_row)

        except FeatureNotFoundError as err:
            logger.debug(f'failed to match positional variants ({variant}): {err}')
            errors += 1
            if 'gene' in row:
                problem_genes.add(row['gene'])
            elif 'gene1' in row and f"({row['gene1']})" in str(err):
                problem_genes.add(row['gene1'])
            elif 'gene2' in row and f"({row['gene2']})" in str(err):
                problem_genes.add(row['gene2'])
            elif 'gene1' in row and 'gene2' in row:
                problem_genes.add(row['gene1'])
                problem_genes.add(row['gene2'])
            else:
                raise err
        except HTTPError as err:
            errors += 1
            logger.error(f'failed to match positional variants ({variant}): {err}')

    if problem_genes:
        logger.error(f'gene finding failures for {sorted(problem_genes)}')
        logger.error(f'{len(problem_genes)} gene finding failures for positional variants')
    if errors:
        logger.error(f'skipped {errors} positional variants due to errors')
    logger.info(
        f'matched {len(variants)} positional variants to {len(alterations)} graphkb annotations'
    )

    return alterations