def test_open_mutations_without_header(self):
        ''' test that we fail for files without correct headers
        '''

        lines = [['TEST', '1', '200', 'A', 'C', 'missense_variant']]

        temp = self.write_temp(lines)

        with self.assertRaises(ValueError):
            open_mutations(temp.name)
    def test_open_mutations_consequence_noninteger_positions(self):
        ''' test that we fail if the nucleotide position is not an integer
        '''

        lines = [['symbol', 'chrom', 'pos', 'ref', 'alt', 'consequence'],
                 ['TEST', '1', '200.0', 'A', 'C', 'missense_variant']]

        temp = self.write_temp(lines)

        with self.assertRaises(ValueError):
            open_mutations(temp.name)
    def test_open_mutations_multiple_genes(self):
        ''' test that we load mutations in multiple genes correctly
        '''

        lines = [['symbol', 'chrom', 'pos', 'ref', 'alt', 'consequence'],
                 ['TEST', '1', '200', 'A', 'C', 'missense_variant'],
                 ['TEST2', '1', '300', 'A', 'C', 'missense_variant']]

        temp = self.write_temp(lines)

        self.assertEqual(
            open_mutations(temp.name), {
                'TEST': [{
                    'chrom': '1',
                    'pos': 200,
                    'ref': 'A',
                    'alt': 'C',
                    'consequence': 'missense_variant'
                }],
                'TEST2': [{
                    'chrom': '1',
                    'pos': 300,
                    'ref': 'A',
                    'alt': 'C',
                    'consequence': 'missense_variant'
                }]
            })
def main():
    args = get_options()

    ensembl = EnsemblRequest(args.cache, args.genome_build)
    cadd = pysam.TabixFile(args.cadd)

    constraint = load_regional_constraint(args.constraint)

    # open de novo mutations
    all_de_novos = open_mutations(args.de_novos)

    mut_dict = load_mutation_rates()

    output = open(args.output, 'w')
    output.write('symbol\tseverity_p_value\n')
    for symbol in all_de_novos:
        if symbol in ['', '.']:
            continue

        print(symbol)
        de_novos = all_de_novos[symbol]
        p_value = analyse_gene(ensembl, mut_dict, cadd, symbol, de_novos,
                               constraint, WEIGHTS)
        line = '{}\t{}\n'.format(symbol, p_value)
        output.write(line)
    def test_open_mutations_consequence(self):
        ''' test that we load indels correctly
        '''

        lines = [['symbol', 'chrom', 'pos', 'ref', 'alt', 'consequence'],
                 ['TEST', '1', '200', 'A', 'C', 'synonymous_variant']]

        temp = self.write_temp(lines)

        # check that variants that do not alter protein sequence are excluded
        self.assertEqual(open_mutations(temp.name), {})
    def test_open_mutations_indels(self):
        ''' test that we load indels correctly
        '''

        lines = [['symbol', 'chrom', 'pos', 'ref', 'alt', 'consequence'],
                 ['TEST', '1', '200', 'A', 'C', 'missense_variant'],
                 ['TEST2', '1', '300', 'A', 'CG', 'frameshift_variant']]

        temp = self.write_temp(lines)

        # check that indels are excluded by default
        self.assertEqual(
            open_mutations(temp.name), {
                'TEST': [{
                    'chrom': '1',
                    'pos': 200,
                    'ref': 'A',
                    'alt': 'C',
                    'consequence': 'missense_variant'
                }]
            })

        # check that if we pass the indels=True argument, then we pick up indels
        self.assertEqual(
            open_mutations(temp.name, indels=True), {
                'TEST': [{
                    'chrom': '1',
                    'pos': 200,
                    'ref': 'A',
                    'alt': 'C',
                    'consequence': 'missense_variant'
                }],
                'TEST2': [{
                    'chrom': '1',
                    'pos': 300,
                    'ref': 'A',
                    'alt': 'CG',
                    'consequence': 'frameshift_variant'
                }]
            })
    def test_open_mutations_column_order(self):
        ''' test that we can load files with different column orders
        '''

        # scramble the column order
        lines = [['chrom', 'ref', 'alt', 'consequence', 'symbol', 'pos'],
                 ['1', 'A', 'C', 'missense_variant', 'TEST', '200']]

        temp = self.write_temp(lines)

        # check that we still pick up the correct columns based on the header
        self.assertEqual(
            open_mutations(temp.name), {
                'TEST': [{
                    'chrom': '1',
                    'pos': 200,
                    'ref': 'A',
                    'alt': 'C',
                    'consequence': 'missense_variant'
                }]
            })

        # add an extra column, and check that the file is loaded as per usual
        lines = [[
            'chrom', 'ref', 'alt', 'consequence', 'symbol', 'pos', 'extra'
        ], ['1', 'A', 'C', 'missense_variant', 'TEST', '200', 'something']]

        self.assertEqual(
            open_mutations(temp.name), {
                'TEST': [{
                    'chrom': '1',
                    'pos': 200,
                    'ref': 'A',
                    'alt': 'C',
                    'consequence': 'missense_variant'
                }]
            })