Пример #1
0
 def test_get_most_severe(self):
     ''' check that get_most_severe works correctly
     '''
     
     cq = ['missense_variant', 'protein_altering_variant',
         'splice_region_variant', 'incomplete_terminal_codon_variant']
     self.assertEqual(get_most_severe(cq), 'missense_variant')
     
     cq = ['stop_lost', 'start_lost', 'transcript_amplification',
         'conserved_exon_terminus_variant']
     self.assertEqual(get_most_severe(cq), 'stop_lost')
     
     # an empty list raises an error
     with self.assertRaises(IndexError):
         get_most_severe([])
Пример #2
0
    def test_get_most_severe(self):
        ''' check that get_most_severe works correctly
        '''

        cq = [
            'missense_variant', 'protein_altering_variant',
            'splice_region_variant', 'incomplete_terminal_codon_variant'
        ]
        self.assertEqual(get_most_severe(cq), 'missense_variant')

        cq = [
            'stop_lost', 'start_lost', 'transcript_amplification',
            'conserved_exon_terminus_variant'
        ]
        self.assertEqual(get_most_severe(cq), 'stop_lost')

        # an empty list raises an error
        with self.assertRaises(IndexError):
            get_most_severe([])
Пример #3
0
def person_recurrence(de_novos):
    """ identify de novos recurrent in a gene within individuals.
    
    Find the de novos that are recurrent within a single individual in a
    single gene. We shall treat these as a single de novo event. Prioritise
    including the most severe event within a gene, then take the first variant
    left after that.
    
    Args:
        de_novos: dataframe of de novo variants
    
    Returns:
        pandas Series for whether each candidate is a duplicate or not
    """

    # find the variants which are recurrent within a person in a single gene
    from_start = de_novos.duplicated(["person_stable_id", "symbol"])
    from_end = de_novos.duplicated(["person_stable_id", "symbol"], keep='last')

    person_dups = from_start | from_end
    in_person_dups = de_novos[person_dups]

    # split the dataset, so we can process gene by gene
    genes = in_person_dups.groupby(["person_stable_id", "symbol"])

    # pick a variant for each person, the first of the most severe consequence
    retain = pandas.Series([], dtype=numpy.bool_)
    for key, gene in genes:
        consequence = get_most_severe(gene["consequence"])
        first = gene[gene["consequence"] == consequence].index[0]

        gene_retain = pandas.Series([True] * len(gene), index=gene.index)
        gene_retain[first] = False
        retain = retain.append(gene_retain)

    # set the selected de novos
    person_dups.loc[retain.index] = retain

    return person_dups
def person_recurrence(de_novos):
    """ identify de novos recurrent in a gene within individuals.
    
    Find the de novos that are recurrent within a single individual in a
    single gene. We shall treat these as a single de novo event. Prioritise
    including the most severe event within a gene, then take the first variant
    left after that.
    
    Args:
        de_novos: dataframe of de novo variants
    
    Returns:
        pandas Series for whether each candidate is a duplicate or not
    """
    
    # find the variants which are recurrent within a person in a single gene
    from_start = de_novos.duplicated(["person_stable_id", "symbol"])
    from_end = de_novos.duplicated(["person_stable_id", "symbol"], take_last=True)
    
    person_dups = from_start | from_end
    in_person_dups = de_novos[person_dups]
    
    # split the dataset, so we can process gene by gene
    genes = in_person_dups.groupby(["person_stable_id", "symbol"])
    
    # pick a variant for each person, the first of the most severe consequence
    retain = pandas.Series([], dtype=numpy.bool_)
    for key, gene in genes:
        consequence = get_most_severe(gene["consequence"])
        first = gene[gene["consequence"] == consequence].index[0]
        
        gene_retain = pandas.Series([True] * len(gene), index=gene.index)
        gene_retain[first] = False
        retain = retain.append(gene_retain)
    
    # set the selected de novos
    person_dups.loc[retain.index] = retain
    
    return person_dups