def mock_proteins_and_genes(count): from database import db from models import Gene, Protein for i in range(count): g = Gene(name='Gene_%s' % i, full_name='Full name of gene %s' % i) p = Protein(refseq='NM_000%s' % i, gene=g) g.preferred_isoform = p db.session.add(g)
def test_autocomplete_all(self): # MC3 GeneList is required as a target (a href for links) where users will be pointed # after clicking of cancer autocomplete suggestion. Likewise with the ClinVar list. db.session.add_all([ GeneList(name=name, mutation_source_name=detail_class.name) for name, detail_class in [ ('TCGA', MC3Mutation), ('ClinVar', InheritedMutation) ] ]) g = Gene(name='BR') p = Protein(id=1, refseq='NM_007', gene=g, sequence='XXXXXV') g.preferred_isoform = p # required for gene search to work - genes without preferred isoforms are ignored mut = Mutation(protein=p, position=6, alt='E') db.session.add_all([mut, p, g]) def autocomplete(query): r = self.client.get('/search/autocomplete_all/?q=' + query) self.visit_returned_urls(r) return r from database import bdb_refseq, bdb bdb_refseq['BR V6E'] = [p.id] # required for mutation search bdb.add_genomic_mut('1', 10000, 'T', 'C', mut) # Gene and mutations response = autocomplete('BR V6E') entry = get_entry_and_check_type(response, 'aminoacid mutation') assert entry response = autocomplete('BR V6') entry = get_entry_and_check_type(response, 'message') assert 'Awaiting for <code>{alt}</code>' in entry['name'] response = autocomplete('BR V') entry = get_entry_and_check_type(response, 'message') assert 'Awaiting for <code>{pos}{alt}</code>' in entry['name'] response = autocomplete('B') entry = get_entry_and_check_type(response, 'gene') assert 'BR' == entry['name'] # genomic mutation response = autocomplete('chr1 10000 T C') entry = get_entry_and_check_type(response, 'nucleotide mutation') assert entry and entry['input'] == 'CHR1 10000 T C' # is the search falling back to the other strand? response = autocomplete('chr1 10000 A G') entry = get_entry_and_check_type(response, 'nucleotide mutation') assert entry and entry['input'] == 'complement of CHR1 10000 A G' prompt = 'Awaiting for mutation in <code>{chrom} {pos} {ref} {alt}</code> format' for prompt_invoking_query in ['chr1', 'chr1 ', 'chr1 40', 'chr1 40 ', 'chr1 40 T']: response = autocomplete(prompt_invoking_query) entry = get_entry_and_check_type(response, 'message') assert entry['name'] == prompt # Pathways pathways = [ Pathway(description='Activation of RAS in B cells', reactome=1169092), Pathway(description='abortive mitotic cell cycle', gene_ontology=33277), Pathway(description='amacrine cell differentiation', gene_ontology=35881), Pathway(description='amniotic stem cell differentiation', gene_ontology=97086) ] db.session.add_all(pathways) # test partial matching and Reactome id pathways search for ras_activation_query in ['Activation', 'REAC:1', 'REAC:1169092']: response = autocomplete(ras_activation_query) entry = get_entry_and_check_type(response, 'pathway') assert entry['name'].startswith('Activation of RAS in B cells') # test Gene Ontology search: response = autocomplete('GO:33') go_pathway = get_entry_and_check_type(response, 'pathway') assert go_pathway['name'] == 'abortive mitotic cell cycle (GO:33277)' # check if multiple pathways are returned response = autocomplete('differentiation') assert len(response.json['entries']) == 2 # check if both genes an pathways are returned simultaneously # there should be: a pathway ('a>b<ortive...') and the >B<R gene response = autocomplete('b') entries = response.json['entries'] names = [entry['name'] for entry in entries] assert all([name in names for name in ['BR', 'abortive mitotic cell cycle']]) # check if "search more pathways" is displayed response = autocomplete('cell') # cell occurs in all four of added pathways; # as a limit of pathways shown is 3, we should get a "show more" link links = entries_with_type(response, 'see_more') assert len(links) == 1 assert links[0]['name'] == 'Show all pathways matching <i>cell</i>' # test case insensitive text search response = autocomplete('AMNIOTIC STEM') pathways = entries_with_type(response, 'pathway') assert len(pathways) == 1 assert pathways[0]['name'] == 'amniotic stem cell differentiation' # Disease disease_names = [ 'Cystic fibrosis', 'Polycystic kidney disease 2', 'Frontotemporal dementia', 'Cataract, nuclear total' ] diseases = {name: Disease(name=name) for name in disease_names} db.session.add_all(diseases.values()) response = autocomplete('cystic') cystic_matching = entries_with_type(response, 'disease') # both 'Cystic fibrosis' and PKD2 should match assert len(cystic_matching) == 2 # is comma containing disease name properly linked? response = autocomplete('Cataract') cataract = get_entry_and_check_type(response, 'disease') assert cataract['name'] == 'Cataract, nuclear total' # Gene mutation in disease # test suggestions response = autocomplete('cystic ') entry = entries_with_type(response, 'message')[0] assert re.match('Do you wish to search for (.*?) mutations\?', entry['name']) # currently there are no mutations associated with any disease # so the auto-completion should not return any results response = autocomplete('cystic in ') assert not response.json['entries'] # let's add a mutation m = Mutation(protein=p, position=1, alt='Y') bdb_refseq['BR X1Y'] = ['NM_007'] # note: sig_code is required here data = ClinicalData(disease=diseases['Cystic fibrosis'], sig_code=1) disease_mutation = InheritedMutation(mutation=m, clin_data=[data]) db.session.add_all([m, data, disease_mutation]) # should return '.. in BR' suggestion now. for query in ['cystic in', 'cystic in ']: response = autocomplete(query) result = get_entry_and_check_type(response, 'disease_in_protein') assert result['gene'] == 'BR' assert result['name'] == 'Cystic fibrosis' # both gene search and refseq search should yield the same, non-empty results results = [] for query in ['cystic in BR', 'cystic in NM_007', 'cystic in 007']: response = autocomplete(query) result = get_entry_and_check_type(response, 'disease_in_protein') results.append(result) assert all(r == result for r in results) and result
def test_mutated_sites(self): g = Gene(name='Gene X') p = Protein(refseq='NM_007', sequence='ABCDEFGHIJKLMNOPQRSTUVWXYZ', gene=g) g.preferred_isoform = p glycosylation = SiteType(name='glycosylation') sites = { # ClinVar muts and TCGA muts but different, with total count = 5 (3 + 2) 'A': Site(position=1, residue='A', protein=p), # ClinVar muts intersection TCGA muts, total count = 4 (2 + 2) 'K': Site(position=11, residue='K', protein=p), # Only TCGA muts, total count = 3 (1 + 2) 'U': Site(position=21, residue='U', protein=p, types={glycosylation}) } def mut(pos): return Mutation(position=pos, alt='X', protein=p) intersecting_mut = mut(11) mutations = [ # the first site (1 A) InheritedMutation( mutation=mut(1), clin_data=[ClinicalData(), ClinicalData(), ClinicalData()] ), MC3Mutation(mutation=mut(2), count=2), # the second site (11 K) InheritedMutation( mutation=intersecting_mut, clin_data=[ClinicalData(), ClinicalData()] ), MC3Mutation(mutation=intersecting_mut, count=2), # the third site (21 U) MC3Mutation(mutation=mut(20), count=1), MC3Mutation(mutation=mut(22), count=2), ] db.session.add_all(mutations) db.session.add_all([p, g]) db.session.add_all(sites.values()) db.session.commit() sites_with_clinvar = most_mutated_sites([InheritedMutation]).all() assert sites_with_clinvar == [(sites['A'], 3), (sites['K'], 2)] sites_with_mc3 = most_mutated_sites([MC3Mutation]).all() assert set(sites_with_mc3) == {(sites['A'], 2), (sites['K'], 2), (sites['U'], 3)} both_sources = [MC3Mutation, InheritedMutation] sites_with_muts_in_both_intersection = most_mutated_sites(both_sources, intersection=True).all() assert sites_with_muts_in_both_intersection == [(sites['K'], 4)] sites_with_muts_in_both = most_mutated_sites(both_sources, intersection=False).all() assert sites_with_muts_in_both == [(sites['A'], 5), (sites['K'], 4)] glyco_sites_with_mc3 = most_mutated_sites([MC3Mutation], site_type=glycosylation).all() assert glyco_sites_with_mc3 == [(sites['U'], 3)]