Exemplo n.º 1
0
    def compare_kmers(self):

        '''
        '''

        self.kmers['ref'] = {}
        jellyfish = self.params.get_param('jellyfish')
        kmer_size = int(self.params.get_param('kmer_size'))

        for i in range(len(self.files['target_ref_fn'])):
            utils.log(self.logging_name, 'info', 'Indexing kmers for reference sequence %s' % self.files['target_ref_fn'][i])
            self.kmers['ref'] = utils.load_kmers(utils.run_jellyfish(self.files['target_ref_fn'][i], jellyfish, kmer_size), self.kmers['ref'])

        # if 'target_altref_fn' in self.files:
        #     for i in range(len(self.files['target_altref_fn'])):
        #         for j in range(len(self.files['target_altref_fn'][i])):
        #             utils.log(self.logging_name, 'info', 'Indexing kmers for reference sequence %s' % self.files['target_altref_fn'][i])
        #             self.kmers['ref'] = utils.load_kmers(utils.run_jellyfish(self.files['target_altref_fn'][i][j], jellyfish, kmer_size), self.kmers['ref'])

        utils.log(self.logging_name, 'info', 'Indexing kmers for sample sequence %s' % self.files['sv_cleaned_fq'])
        self.kmers['case'] = {}
        self.kmers['case'] = utils.load_kmers(utils.run_jellyfish(self.files['sv_cleaned_fq'], jellyfish, kmer_size), self.kmers['case'])
        self.kmers['case_sc'] = {}
        self.kmers['case_sc'] = utils.load_kmers(utils.run_jellyfish(self.files['sv_sc_unmapped_fa'], jellyfish, kmer_size), self.kmers['case_sc'])
        sc_mers = set(self.kmers['case'].keys()) & set(self.kmers['case_sc'])
        sample_only_mers = list(sc_mers.difference(set(self.kmers['ref'].keys())))

        if 'normal_bam_file' in self.params.opts:
            norm_kmers = {}
            norm_kmers = utils.load_kmers(utils.run_jellyfish(self.files['norm_cleaned_fq'], jellyfish, kmer_size), norm_kmers)
            sample_only_mers = set(sample_only_mers).difference(set(norm_kmers.keys()))

        sample_only_mers = list(sample_only_mers)

        # Write case only kmers out to file.
        self.files['sample_kmers'] = os.path.join(self.paths['kmers'], self.name + "_sample_kmers.out")
        sample_kmer_fout = open(self.files['sample_kmers'], 'w')

        self.kmers['case_only'] = {}
        for mer in sample_only_mers:
            sample_kmer_fout.write("\t".join([str(x) for x in [mer, str(self.kmers['case'][mer])]]) + "\n")
            self.kmers['case_only'][mer] = self.kmers['case'][mer]
        sample_kmer_fout.close()

        self.kmers['ref'] = {}
        self.kmers['case'] = {}
        self.kmers['case_sc'] = {}

        utils.log(self.logging_name, 'info', 'Writing %d sample-only kmers to file %s' % (len(self.kmers['case_only']), self.files['sample_kmers']))
        self.files['kmer_clusters'] = os.path.join(self.paths['kmers'], self.name + "_sample_kmers_merged.out")
        utils.log(self.logging_name, 'info', 'Writing kmer clusters to file %s' % self.files['kmer_clusters'])
        
        self.contigs = assembler.init_assembly(self.kmers['case_only'], self.cleaned_read_recs['sv'], kmer_size, int(self.params.get_param('trl_sr_thresh')), self.params.get_param('read_len'))
        self.cleaned_read_recs = None
        self.kmers['case_only'] = {}
        self.finalize_contigs()
Exemplo n.º 2
0
    def compare_kmers(self):

        '''
        '''

        self.kmers['ref'] = {}
        jellyfish = self.params.get_param('jellyfish')
        kmer_size = int(self.params.get_param('kmer_size'))

        for i in range(len(self.files['target_ref_fn'])):
            utils.log(self.logging_name, 'info', 'Indexing kmers for reference sequence %s' % self.files['target_ref_fn'][i])
            self.kmers['ref'] = utils.load_kmers(utils.run_jellyfish(self.files['target_ref_fn'][i], jellyfish, kmer_size), self.kmers['ref'])

        # if 'target_altref_fn' in self.files:
        #     for i in range(len(self.files['target_altref_fn'])):
        #         for j in range(len(self.files['target_altref_fn'][i])):
        #             utils.log(self.logging_name, 'info', 'Indexing kmers for reference sequence %s' % self.files['target_altref_fn'][i])
        #             self.kmers['ref'] = utils.load_kmers(utils.run_jellyfish(self.files['target_altref_fn'][i][j], jellyfish, kmer_size), self.kmers['ref'])

        utils.log(self.logging_name, 'info', 'Indexing kmers for sample sequence %s' % self.files['sv_cleaned_fq'])
        self.kmers['case'] = {}
        self.kmers['case'] = utils.load_kmers(utils.run_jellyfish(self.files['sv_cleaned_fq'], jellyfish, kmer_size), self.kmers['case'])
        self.kmers['case_sc'] = {}
        self.kmers['case_sc'] = utils.load_kmers(utils.run_jellyfish(self.files['sv_sc_unmapped_fa'], jellyfish, kmer_size), self.kmers['case_sc'])
        sc_mers = set(self.kmers['case'].keys()) & set(self.kmers['case_sc'])
        sample_only_mers = list(sc_mers.difference(set(self.kmers['ref'].keys())))

        if 'normal_bam_file' in self.params.opts:
            norm_kmers = {}
            norm_kmers = utils.load_kmers(utils.run_jellyfish(self.files['norm_cleaned_fq'], jellyfish, kmer_size), norm_kmers)
            sample_only_mers = set(sample_only_mers).difference(set(norm_kmers.keys()))

        sample_only_mers = list(sample_only_mers)

        # Write case only kmers out to file.
        self.files['sample_kmers'] = os.path.join(self.paths['kmers'], self.name + "_sample_kmers.out")
        sample_kmer_fout = open(self.files['sample_kmers'], 'w')

        self.kmers['case_only'] = {}
        for mer in sample_only_mers:
            sample_kmer_fout.write("\t".join([str(x) for x in [mer, str(self.kmers['case'][mer])]]) + "\n")
            self.kmers['case_only'][mer] = self.kmers['case'][mer]
        sample_kmer_fout.close()

        self.kmers['ref'] = {}
        self.kmers['case'] = {}
        self.kmers['case_sc'] = {}

        utils.log(self.logging_name, 'info', 'Writing %d sample-only kmers to file %s' % (len(self.kmers['case_only']), self.files['sample_kmers']))
        self.files['kmer_clusters'] = os.path.join(self.paths['kmers'], self.name + "_sample_kmers_merged.out")
        utils.log(self.logging_name, 'info', 'Writing kmer clusters to file %s' % self.files['kmer_clusters'])

        self.contigs = assembler.init_assembly(self.kmers['case_only'], self.cleaned_read_recs['sv'], kmer_size, int(self.params.get_param('trl_sr_thresh')), self.params.get_param('read_len'))
        self.cleaned_read_recs = None
        self.kmers['case_only'] = {}
        self.finalize_contigs()
Exemplo n.º 3
0
    def compare_kmers(self, kmerPath, name, readLen, targetRefFns):
        """
        """

        # Set the reference sequence kmers.
        self.set_reference_kmers(targetRefFns)

        # Set sample kmers.
        self.set_sample_kmers()
        # Merge the kmers from the cleaned sample sequences and the unmapped and softclipped sequences.
        scKmers = set(self.kmers['case'].keys()) & set(
            self.kmers['case_sc'].keys())
        # Take the difference from the reference kmers.
        sampleOnlyKmers = list(
            scKmers.difference(set(self.kmers['ref'].keys())))
        # Add normal sample kmers if available.
        if self.params.get_param('normal_bam_file'):
            normKmers = {}
            self.get_kmers(self.files['norm_cleaned_fq'], normKmers)
            sampleOnlyKmers = list(
                set(sampleOnlyKmers).difference(set(normKmers.keys())))

        # Write case only kmers out to file.
        self.files['sample_kmers'] = os.path.join(kmerPath,
                                                  name + "_sample_kmers.out")
        sample_kmer_fout = open(self.files['sample_kmers'], 'w')
        kmer_counter = 1
        self.kmers['case_only'] = {}
        for mer in sampleOnlyKmers:
            sample_kmer_fout.write("\t".join(
                [str(x) for x in [mer, str(self.kmers['case'][mer])]]) + "\n")
            self.kmers['case_only'][mer] = self.kmers['case'][mer]
        sample_kmer_fout.close()

        # Clean out data structures.
        self.kmers['ref'] = {}
        self.kmers['case'] = {}
        self.kmers['case_sc'] = {}

        utils.log(
            self.loggingName, 'info',
            'Writing %d sample-only kmers to file %s' %
            (len(self.kmers['case_only']), self.files['sample_kmers']))
        self.files['kmer_clusters'] = os.path.join(
            kmerPath, name + "_sample_kmers_merged.out")
        utils.log(
            self.loggingName, 'info',
            'Writing kmer clusters to file %s' % self.files['kmer_clusters'])

        self.kmers['clusters'] = assembly.init_assembly(
            self.kmers['case_only'], self.cleaned_read_recs['sv'],
            self.params.get_kmer_size(), self.params.get_sr_thresh('min'),
            readLen)
        self.clear_cleaned_reads()
        self.kmers['case_only'] = {}
Exemplo n.º 4
0
    def compare_kmers(self, kmerPath, name, readLen, targetRefFns):
        """
        """

        # Set the reference sequence kmers.
        self.set_reference_kmers(targetRefFns)

        # Set sample kmers.
        self.set_sample_kmers()
        # Merge the kmers from the cleaned sample sequences and the unmapped and softclipped sequences.
        scKmers = set(self.kmers['case'].keys()) & set(self.kmers['case_sc'].keys())
        # Take the difference from the reference kmers.
        sampleOnlyKmers = list(scKmers.difference(set(self.kmers['ref'].keys())))
        # Add normal sample kmers if available.
        if self.params.get_param('normal_bam_file'):
            normKmers = {}
            self.get_kmers(self.files['norm_cleaned_fq'], normKmers)
            sampleOnlyKmers = list(set(sampleOnlyKmers).difference(set(normKmers.keys())))

        # Write case only kmers out to file.
        self.files['sample_kmers'] = os.path.join(kmerPath, name + "_sample_kmers.out")
        sample_kmer_fout = open(self.files['sample_kmers'], 'w')
        kmer_counter = 1
        self.kmers['case_only'] = {}
        for mer in sampleOnlyKmers:
            sample_kmer_fout.write("\t".join([str(x) for x in [mer, str(self.kmers['case'][mer])]]) + "\n")
            self.kmers['case_only'][mer] = self.kmers['case'][mer]
        sample_kmer_fout.close()

        # Clean out data structures.
        self.kmers['ref'] = {}
        self.kmers['case'] = {}
        self.kmers['case_sc'] = {}

        utils.log(self.loggingName, 'info', 'Writing %d sample-only kmers to file %s' % (len(self.kmers['case_only']), self.files['sample_kmers']))
        self.files['kmer_clusters'] = os.path.join(kmerPath, name + "_sample_kmers_merged.out")
        utils.log(self.loggingName, 'info', 'Writing kmer clusters to file %s' % self.files['kmer_clusters'])

        self.kmers['clusters'] = assembly.init_assembly(self.kmers['case_only'], self.cleaned_read_recs['sv'], self.params.get_kmer_size(), self.params.get_sr_thresh('min'), readLen)
        self.clear_cleaned_reads()
        self.kmers['case_only'] = {}