def test_custom_categories(self, graphkb_conn): category = statement.categorize_relevance( graphkb_conn, 'x', [('blargh', ['some', 'blargh'])]) assert category == '' category = statement.categorize_relevance( graphkb_conn, '1', [('blargh', ['some', 'blargh'])]) assert category == 'blargh'
def test_predisposition_category(self, graphkb_conn): category = statement.categorize_relevance(graphkb_conn, '8') assert category == 'cancer predisposition'
def test_no_match(self, graphkb_conn): category = statement.categorize_relevance(graphkb_conn, 'x') assert category == ''
def test_third_category(self, graphkb_conn): category = statement.categorize_relevance(graphkb_conn, '4') assert category == 'prognostic'
def test_fifth_category(self, graphkb_conn): category = statement.categorize_relevance(graphkb_conn, '6') assert category == 'pharmacogenomic'
def test_second_category(self, graphkb_conn): category = statement.categorize_relevance(graphkb_conn, '3') assert category == 'diagnostic'
def test_first_match_returns(self, graphkb_conn): category = statement.categorize_relevance(graphkb_conn, '2') assert category == 'therapeutic'
def test_default_categories(self, graphkb_conn): category = statement.categorize_relevance(graphkb_conn, '1') assert category == 'therapeutic'
def create_section_html( graphkb_conn: GraphKBConnection, gene_name: str, sentences_by_statement_id: Dict[str, str], statements: Dict[str, Statement], exp_variants: List[IprVariant], ) -> str: """ Generate HTML for a gene section of the comments """ output = [f'<h2>{gene_name}</h2>'] sentence_categories: Dict[str, str] = {} for statement_id, sentence in sentences_by_statement_id.items(): relevance = statements[statement_id]['relevance']['@rid'] category = categorize_relevance( graphkb_conn, relevance, RELEVANCE_BASE_TERMS + [('resistance', ['no sensitivity'])]) sentence_categories[sentence] = category # get the entrez gene description genes = sorted( graphkb_conn.query( { 'target': 'Feature', 'filters': { 'AND': [ { 'source': { 'target': 'Source', 'filters': { 'name': 'entrez gene' } } }, { 'name': gene_name }, { 'biotype': 'gene' }, ] }, }, ), key=generate_ontology_preference_key, ) variants_text = display_variants(gene_name, exp_variants) if not variants_text: # exclude sections where they are not linked to an experimental variant. this can occur when there are co-occurent statements collected return '' if genes and genes[0].get('description', ''): description = '. '.join(genes[0]['description'].split('. ')[:2]) sourceId = genes[0]['sourceId'] output.append(f''' <blockquote class="entrez_description" cite="{ENTREZ_GENE_URL}/{sourceId}"> {description}. </blockquote> <p> {variants_text} </p> ''') sentences_used: Set[str] = set() for section in [ {s for (s, v) in sentence_categories.items() if v == 'diagnostic'}, {s for (s, v) in sentence_categories.items() if v == 'biological'}, { s for (s, v) in sentence_categories.items() if v in ['therapeutic', 'prognostic'] }, { s for (s, v) in sentence_categories.items() if v not in [ 'diagnostic', 'biological', 'therapeutic', 'prognostic', 'resistance' ] }, {s for (s, v) in sentence_categories.items() if v == 'resistance'}, ]: content = '. '.join(sorted(list(section - sentences_used))) sentences_used.update(section) output.append(f'<p>{content}</p>') return '\n'.join(output)
def convert_statements_to_alterations( graphkb_conn: GraphKBConnection, statements: List[Statement], disease_name: str, variant_matches: Iterable[str], ) -> List[KbMatch]: """ Given a set of statements matched from graphkb, convert these into their IPR equivalent representations Args: graphkb_conn: the graphkb connection object statements: list of statement records from graphkb disease_name: name of the cancer type for the patient being reported on variant_matches: the list of RIDs the variant matched for these statements Raises: ValueError: could not find the disease type in GraphKB Returns: IPR graphkb row representations Notes: - only report disease matched prognostic markers https://www.bcgsc.ca/jira/browse/GERO-72 and GERO-196 """ disease_matches = { r['@rid'] for r in get_term_tree( graphkb_conn, disease_name, ontology_class='Disease') } if not disease_matches: raise ValueError( f'failed to match disease ({disease_name}) to graphkb') rows = [] approved = convert_to_rid_set(get_approved_evidence_levels(graphkb_conn)) for statement in statements: variants = [ c for c in statement['conditions'] if c['@class'] in VARIANT_CLASSES ] diseases = [ c for c in statement['conditions'] if c['@class'] == 'Disease' ] pmid = ';'.join([e['displayName'] for e in statement['evidence']]) relevance_id = statement['relevance']['@rid'] approved_therapy = False disease_match = len( diseases) == 1 and diseases[0]['@rid'] in disease_matches ipr_section = categorize_relevance(graphkb_conn, relevance_id) if ipr_section == 'therapeutic': for level in statement['evidenceLevel'] or []: if level['@rid'] in approved: approved_therapy = True break if ipr_section == 'prognostic' and not disease_match: continue # GERO-72 / GERO-196 for variant in variants: if variant['@rid'] not in variant_matches: continue row = KbMatch({ 'approvedTherapy': approved_therapy, 'category': ipr_section or 'unknown', 'context': (statement['subject']['displayName'] if statement['subject'] else None), 'kbContextId': (statement['subject']['@rid'] if statement['subject'] else None), 'disease': ';'.join(sorted(d['displayName'] for d in diseases)), 'evidenceLevel': display_evidence_levels(statement), 'kbStatementId': statement['@rid'], 'kbVariant': variant['displayName'], 'kbVariantId': variant['@rid'], 'matchedCancer': disease_match, 'reference': pmid, 'relevance': statement['relevance']['displayName'], 'kbRelevanceId': statement['relevance']['@rid'], 'externalSource': statement['source']['displayName'] if statement['source'] else None, 'externalStatementId': statement.get('sourceId'), }) rows.append(row) return rows