示例#1
0
def find_transcripts(ensembl, mut_dict, output, args):

    de_novos = load_de_novos(args.de_novos)

    output.write("hgnc_symbol\ttranscript_id\tlength\tde_novos\n")

    for symbol in sorted(de_novos):
        print(symbol)
        func_events = de_novos[symbol]["missense"] + de_novos[symbol][
            "nonsense"]

        # find the counts per transcript, depending on whether we want to count
        # for all transcripts containing one or more de novos, or to find the
        # minimum set of transcripts to contain the de novos
        try:
            if args.all_transcripts:
                counts = count_de_novos_per_transcript(ensembl, symbol,
                                                       func_events)
            elif args.minimal_transcripts:
                counts = minimise_transcripts(ensembl, symbol, func_events)
        except (ValueError, IndexError):
            print("error occured with {0}".format(symbol))
            continue

        # write the transcript details to a file
        for key in counts:
            line = "{}\t{}\t{}\t{}\n".format(symbol, key, counts[key]["len"],
                                             counts[key]["n"])
            output.write(line)
示例#2
0
def find_transcripts(ensembl, mut_dict, output, args):
    
    de_novos = load_de_novos(args.de_novos)
    
    output.write("hgnc_symbol\ttranscript_id\tlength\tde_novos\n")
    
    for symbol in sorted(de_novos):
        print(symbol)
        func_events = de_novos[symbol]["missense"] + de_novos[symbol]["nonsense"]
        
        # find the counts per transcript, depending on whether we want to count
        # for all transcripts containing one or more de novos, or to find the
        # minimum set of transcripts to contain the de novos
        try:
            if args.all_transcripts:
                counts = count_de_novos_per_transcript(ensembl, symbol, func_events)
            elif args.minimal_transcripts:
                counts = minimise_transcripts(ensembl, symbol, func_events)
        except (ValueError, IndexError):
            print("error occured with {0}".format(symbol))
            continue
        
        # write the transcript details to a file
        for key in counts:
            line = "{}\t{}\t{}\t{}\n".format(symbol, key, counts[key]["len"],
                counts[key]["n"])
            output.write(line)
示例#3
0
 def test_minimise_transcripts(self):
     """ test that minimise_transcripts() works correctly
     """
     
     # run through a test case for a single gene
     hgnc = "DYNLL1"
     sites = [120934226, 120936012]
     counts = minimise_transcripts(self.ensembl, hgnc, sites)
     expected = {'ENST00000242577': {'len': 89, 'n': 2},
         'ENST00000392508': {'len': 89, 'n': 2},
         'ENST00000392509': {'len': 89, 'n': 2},
         'ENST00000549989': {'len': 89, 'n': 2},
         'ENST00000548342': {'len': 89, 'n': 2}}
     
     self.assertEqual(counts, expected)
     
     # check that when we don't have any de novos, we return an empty list
     self.assertEqual(minimise_transcripts(self.ensembl, hgnc, []), {})
     
     # check that when none of the de novos are in a transcript, we return
     # an empty list.
     self.assertEqual(minimise_transcripts(self.ensembl, hgnc, [100]), {})
示例#4
0
 def test_minimise_transcripts(self):
     """ test that minimise_transcripts() works correctly
     """
     
     # run through a test case for a single gene
     hgnc = "DYNLL1"
     sites = [120934226, 120936012]
     counts = minimise_transcripts(self.ensembl, hgnc, sites)
     expected = {'ENST00000242577': {'len': 89, 'n': 2},
         'ENST00000392508': {'len': 89, 'n': 2},
         'ENST00000392509': {'len': 89, 'n': 2},
         'ENST00000549989': {'len': 89, 'n': 2},
         'ENST00000548342': {'len': 89, 'n': 2}}
     
     self.assertEqual(counts, expected)
     
     # check that when we don't have any de novos, we return an empty list
     self.assertEqual(minimise_transcripts(self.ensembl, hgnc, []), {})
     
     # check that when none of the de novos are in a transcript, we return
     # an empty list.
     self.assertEqual(minimise_transcripts(self.ensembl, hgnc, [100]), {})
示例#5
0
def main():
    
    input_file, output_file, old_gene_id_file, cache_dir, genome_build, \
        all_transcripts, minimal_transcripts = get_options()
    
    # load all the data
    ensembl = EnsemblRequest(cache_dir, genome_build)
    
    old_gene_ids = {}
    if old_gene_id_file is not None:
        old_gene_ids = get_deprecated_gene_ids(old_gene_id_file)
    
    known_de_novos = load_de_novos(input_file, exclude_indels=False)
    
    output = open(output_file, "w")
    output.write("hgnc_symbol\ttranscript_id\tlength\tde_novos\n")
    
    for gene_id in sorted(known_de_novos):
        de_novos = known_de_novos[gene_id]
        func_events = de_novos["missense"] + de_novos["nonsense"]
        
        # fix HGNC IDs that have been discontinued in favour of other gene IDs
        if gene_id in old_gene_ids:
            gene_id = old_gene_ids[gene_id]
        
        # find the counts per transcript, depending on whether we want to count
        # for all transcripts containing one or more de novos, or to find the
        # minimum set of transcripts to contain the de novos
        try:
            if all_transcripts:
                counts = count_de_novos_per_transcript(ensembl, gene_id, func_events)
            elif minimal_transcripts:
                counts = minimise_transcripts(ensembl, gene_id, func_events)
        except (ValueError, IndexError):
            print("error occured with {0}".format(gene_id))
            continue
        
        # write the transcript details to a file
        for key in counts:
            line = "{0}\t{1}\t{2}\t{3}\n".format(gene_id, key, counts[key]["len"], counts[key]["n"])
            output.write(line)
        
    output.close()