def analyse_gene(ensembl, mut_dict, cadd, symbol, de_novos, constraint, weights): ''' analyse the severity of de novos found in a gene Args: ensembl: EnsemblRequest object, for transcript coordinates and sequence mut_dict: list of sequence-context mutation probabilities. cadd: pysam.TabixFile object for CADD scores (SNVs only) symbol: HGNC symbol for current gene de_novos: list of de novo mutations observed in current gene. Each entry is a dict with 'position', 'ref', 'alt', and 'consequence' keys. weights: dictionary of objects to weight CADD severity scores. We have different weights for protein-truncating and protein-altering variants, and within the protein-altering variants, different weights for variants in constrained and unconstrained regions. Returns: p-value for the observed total severity with respect to a null distribution of severities for the gene. ''' sites = [x['pos'] for x in de_novos] try: # create gene/transcript for de novo mutations transcripts = load_gene(ensembl, symbol, sites) except IndexError: return 'NA' # get per site/allele mutation rates rates_by_cq = get_site_sampler(transcripts, mut_dict) chrom = transcripts[0].get_chrom() # get per site/allele severity scores, weighted by enrichment of missense # in known dominant at different severity thresholds constrained = get_constrained_positions(ensembl, constraint, symbol) severity = get_severity(cadd, chrom, rates_by_cq, weights, constrained) # convert the rates per site per consequence to rates per site rates = WeightedChoice() for cq in sorted(rates_by_cq): rates.append(rates_by_cq[cq]) # get summed score for observed de novos observed = sum((get_severity(cadd, chrom, de_novos, weights, constrained))) # simulate distribution of summed scores within transcript return analyse(rates, severity, observed, len(de_novos), 1000000)
def test_append(self): """ test that append() works correctly """ # construct two objects a = WeightedChoice() a.add_choice(1, 0.5) b = WeightedChoice() b.add_choice(2, 1) # add one object to the other a.append(b) # check that the first object has changed correctly, but the other # remains unchanged self.assertEqual(a.get_summed_rate(), 1.5) self.assertEqual(b.get_summed_rate(), 1.0)