def test_custom_categories(self, graphkb_conn):
        category = statement.categorize_relevance(
            graphkb_conn, 'x', [('blargh', ['some', 'blargh'])])
        assert category == ''

        category = statement.categorize_relevance(
            graphkb_conn, '1', [('blargh', ['some', 'blargh'])])
        assert category == 'blargh'
 def test_predisposition_category(self, graphkb_conn):
     category = statement.categorize_relevance(graphkb_conn, '8')
     assert category == 'cancer predisposition'
 def test_no_match(self, graphkb_conn):
     category = statement.categorize_relevance(graphkb_conn, 'x')
     assert category == ''
 def test_third_category(self, graphkb_conn):
     category = statement.categorize_relevance(graphkb_conn, '4')
     assert category == 'prognostic'
 def test_fifth_category(self, graphkb_conn):
     category = statement.categorize_relevance(graphkb_conn, '6')
     assert category == 'pharmacogenomic'
 def test_second_category(self, graphkb_conn):
     category = statement.categorize_relevance(graphkb_conn, '3')
     assert category == 'diagnostic'
 def test_first_match_returns(self, graphkb_conn):
     category = statement.categorize_relevance(graphkb_conn, '2')
     assert category == 'therapeutic'
 def test_default_categories(self, graphkb_conn):
     category = statement.categorize_relevance(graphkb_conn, '1')
     assert category == 'therapeutic'
示例#9
0
def create_section_html(
    graphkb_conn: GraphKBConnection,
    gene_name: str,
    sentences_by_statement_id: Dict[str, str],
    statements: Dict[str, Statement],
    exp_variants: List[IprVariant],
) -> str:
    """
    Generate HTML for a gene section of the comments
    """
    output = [f'<h2>{gene_name}</h2>']

    sentence_categories: Dict[str, str] = {}

    for statement_id, sentence in sentences_by_statement_id.items():
        relevance = statements[statement_id]['relevance']['@rid']
        category = categorize_relevance(
            graphkb_conn, relevance,
            RELEVANCE_BASE_TERMS + [('resistance', ['no sensitivity'])])
        sentence_categories[sentence] = category

    # get the entrez gene description
    genes = sorted(
        graphkb_conn.query(
            {
                'target': 'Feature',
                'filters': {
                    'AND': [
                        {
                            'source': {
                                'target': 'Source',
                                'filters': {
                                    'name': 'entrez gene'
                                }
                            }
                        },
                        {
                            'name': gene_name
                        },
                        {
                            'biotype': 'gene'
                        },
                    ]
                },
            }, ),
        key=generate_ontology_preference_key,
    )

    variants_text = display_variants(gene_name, exp_variants)
    if not variants_text:
        # exclude sections where they are not linked to an experimental variant. this can occur when there are co-occurent statements collected
        return ''
    if genes and genes[0].get('description', ''):
        description = '. '.join(genes[0]['description'].split('. ')[:2])
        sourceId = genes[0]['sourceId']

        output.append(f'''
<blockquote class="entrez_description" cite="{ENTREZ_GENE_URL}/{sourceId}">
    {description}.
</blockquote>
<p>
    {variants_text}
</p>
''')

    sentences_used: Set[str] = set()

    for section in [
        {s
         for (s, v) in sentence_categories.items() if v == 'diagnostic'},
        {s
         for (s, v) in sentence_categories.items() if v == 'biological'},
        {
            s
            for (s, v) in sentence_categories.items()
            if v in ['therapeutic', 'prognostic']
        },
        {
            s
            for (s, v) in sentence_categories.items() if v not in [
                'diagnostic', 'biological', 'therapeutic', 'prognostic',
                'resistance'
            ]
        },
        {s
         for (s, v) in sentence_categories.items() if v == 'resistance'},
    ]:

        content = '. '.join(sorted(list(section - sentences_used)))
        sentences_used.update(section)
        output.append(f'<p>{content}</p>')
    return '\n'.join(output)
示例#10
0
def convert_statements_to_alterations(
    graphkb_conn: GraphKBConnection,
    statements: List[Statement],
    disease_name: str,
    variant_matches: Iterable[str],
) -> List[KbMatch]:
    """
    Given a set of statements matched from graphkb, convert these into their IPR equivalent representations

    Args:
        graphkb_conn: the graphkb connection object
        statements: list of statement records from graphkb
        disease_name: name of the cancer type for the patient being reported on
        variant_matches: the list of RIDs the variant matched for these statements

    Raises:
        ValueError: could not find the disease type in GraphKB

    Returns:
        IPR graphkb row representations

    Notes:
        - only report disease matched prognostic markers https://www.bcgsc.ca/jira/browse/GERO-72 and GERO-196
    """
    disease_matches = {
        r['@rid']
        for r in get_term_tree(
            graphkb_conn, disease_name, ontology_class='Disease')
    }

    if not disease_matches:
        raise ValueError(
            f'failed to match disease ({disease_name}) to graphkb')

    rows = []

    approved = convert_to_rid_set(get_approved_evidence_levels(graphkb_conn))

    for statement in statements:
        variants = [
            c for c in statement['conditions']
            if c['@class'] in VARIANT_CLASSES
        ]
        diseases = [
            c for c in statement['conditions'] if c['@class'] == 'Disease'
        ]
        pmid = ';'.join([e['displayName'] for e in statement['evidence']])

        relevance_id = statement['relevance']['@rid']

        approved_therapy = False

        disease_match = len(
            diseases) == 1 and diseases[0]['@rid'] in disease_matches

        ipr_section = categorize_relevance(graphkb_conn, relevance_id)

        if ipr_section == 'therapeutic':
            for level in statement['evidenceLevel'] or []:
                if level['@rid'] in approved:
                    approved_therapy = True
                    break
        if ipr_section == 'prognostic' and not disease_match:
            continue  # GERO-72 / GERO-196

        for variant in variants:
            if variant['@rid'] not in variant_matches:
                continue
            row = KbMatch({
                'approvedTherapy':
                approved_therapy,
                'category':
                ipr_section or 'unknown',
                'context': (statement['subject']['displayName']
                            if statement['subject'] else None),
                'kbContextId': (statement['subject']['@rid']
                                if statement['subject'] else None),
                'disease':
                ';'.join(sorted(d['displayName'] for d in diseases)),
                'evidenceLevel':
                display_evidence_levels(statement),
                'kbStatementId':
                statement['@rid'],
                'kbVariant':
                variant['displayName'],
                'kbVariantId':
                variant['@rid'],
                'matchedCancer':
                disease_match,
                'reference':
                pmid,
                'relevance':
                statement['relevance']['displayName'],
                'kbRelevanceId':
                statement['relevance']['@rid'],
                'externalSource':
                statement['source']['displayName']
                if statement['source'] else None,
                'externalStatementId':
                statement.get('sourceId'),
            })
            rows.append(row)
    return rows