Exemplo n.º 1
0
Arquivo: seed.py Projeto: satta/iva
    def __init__(self, extend_length=50, overlap_length=None, reads1=None, reads2=None, seq=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000, kmc_threads=1, map_threads=1, sequences_to_ignore=None, contigs_to_check=None):
        if contigs_to_check is None:
            contigs_to_check = {}
        if sequences_to_ignore is None:
            sequences_to_ignore = set()
        self.verbose = verbose
        self.kmc_threads = kmc_threads
        self.map_threads = map_threads
        self.extend_length = extend_length
        self.ext_min_cov = ext_min_cov
        self.ext_min_ratio = ext_min_ratio
        self.seed_lengths = []
        self.overlap_length = overlap_length
        if seq is None:
            if reads1 is None:
                raise Error('Cannot construct Seed object. Need reads when no seq has been given')
            kmer_counts = kcount.get_most_common_kmers(reads1, reads2, most_common=1, min_count=seed_min_count, max_count=seed_max_count, kmer_length=seed_length, verbose=self.verbose, ignore_seqs=sequences_to_ignore, contigs_to_check=contigs_to_check, kmc_threads=self.kmc_threads, map_threads=self.map_threads)
            if len(kmer_counts) == 1:
                self.seq = list(kmer_counts.keys())[0]
                if self.verbose:
                    print('Made new seed. kmer coverage', list(kmer_counts.values())[0], 'and seed is', self.seq, flush=True)
            else:
                self.seq = None
        else:
            self.seq = seq


        if self.seq is not None:
            if overlap_length is None:
                self.overlap_length = len(self.seq)
            else:
                self.overlap_length = overlap_length
        else:
            self.overlap_length = None
Exemplo n.º 2
0
 def test_get_most_common_kmers(self):
     '''Test get_most_common_kmers'''
     reads1 = os.path.join(data_dir, 'kcount_test.get_commonest_kmer_1.fa')
     reads2 = os.path.join(data_dir, 'kcount_test.get_commonest_kmer_2.fa')
     counts = kcount.get_most_common_kmers(reads1,
                                           reads2,
                                           kmer_length=10,
                                           head=100000,
                                           min_count=2,
                                           max_count=4,
                                           most_common=100,
                                           method='kmc')
     self.assertDictEqual({'AGCTAAAACT': 2, 'CTATATCTCA': 3}, counts)
Exemplo n.º 3
0
    def __init__(self,
                 extend_length=50,
                 overlap_length=None,
                 reads1=None,
                 reads2=None,
                 seq=None,
                 ext_min_cov=5,
                 ext_min_ratio=2,
                 verbose=0,
                 seed_length=None,
                 seed_min_count=10,
                 seed_max_count=100000000,
                 kmc_threads=1,
                 map_threads=1,
                 sequences_to_ignore=None,
                 contigs_to_check=None):
        if contigs_to_check is None:
            contigs_to_check = {}
        if sequences_to_ignore is None:
            sequences_to_ignore = set()
        self.verbose = verbose
        self.kmc_threads = kmc_threads
        self.map_threads = map_threads
        self.extend_length = extend_length
        self.ext_min_cov = ext_min_cov
        self.ext_min_ratio = ext_min_ratio
        self.seed_lengths = []
        self.overlap_length = overlap_length
        if seq is None:
            if reads1 is None:
                raise Error(
                    'Cannot construct Seed object. Need reads when no seq has been given'
                )
            kmer_counts = kcount.get_most_common_kmers(
                reads1,
                reads2,
                most_common=1,
                min_count=seed_min_count,
                max_count=seed_max_count,
                kmer_length=seed_length,
                verbose=self.verbose,
                ignore_seqs=sequences_to_ignore,
                contigs_to_check=contigs_to_check,
                kmc_threads=self.kmc_threads,
                map_threads=self.map_threads)
            if len(kmer_counts) == 1:
                self.seq = list(kmer_counts.keys())[0]
                if self.verbose:
                    print('Made new seed. kmer coverage',
                          list(kmer_counts.values())[0],
                          'and seed is',
                          self.seq,
                          flush=True)
            else:
                self.seq = None
        else:
            self.seq = seq

        if self.seq is not None:
            if overlap_length is None:
                self.overlap_length = len(self.seq)
            else:
                self.overlap_length = overlap_length
        else:
            self.overlap_length = None
Exemplo n.º 4
0
 def test_get_most_common_kmers(self):
     '''Test get_most_common_kmers'''
     reads1 = os.path.join(data_dir, 'kcount_test.get_commonest_kmer_1.fa')
     reads2 = os.path.join(data_dir, 'kcount_test.get_commonest_kmer_2.fa')
     counts = kcount.get_most_common_kmers(reads1, reads2, kmer_length=10, head=100000, min_count=2, max_count=4, most_common=100, method='kmc')
     self.assertDictEqual({'AGCTAAAACT': 2, 'CTATATCTCA': 3}, counts)