def __init__(self, extend_length=50, overlap_length=None, reads1=None, reads2=None, seq=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000, kmc_threads=1, map_threads=1, sequences_to_ignore=None, contigs_to_check=None): if contigs_to_check is None: contigs_to_check = {} if sequences_to_ignore is None: sequences_to_ignore = set() self.verbose = verbose self.kmc_threads = kmc_threads self.map_threads = map_threads self.extend_length = extend_length self.ext_min_cov = ext_min_cov self.ext_min_ratio = ext_min_ratio self.seed_lengths = [] self.overlap_length = overlap_length if seq is None: if reads1 is None: raise Error('Cannot construct Seed object. Need reads when no seq has been given') kmer_counts = kcount.get_most_common_kmers(reads1, reads2, most_common=1, min_count=seed_min_count, max_count=seed_max_count, kmer_length=seed_length, verbose=self.verbose, ignore_seqs=sequences_to_ignore, contigs_to_check=contigs_to_check, kmc_threads=self.kmc_threads, map_threads=self.map_threads) if len(kmer_counts) == 1: self.seq = list(kmer_counts.keys())[0] if self.verbose: print('Made new seed. kmer coverage', list(kmer_counts.values())[0], 'and seed is', self.seq, flush=True) else: self.seq = None else: self.seq = seq if self.seq is not None: if overlap_length is None: self.overlap_length = len(self.seq) else: self.overlap_length = overlap_length else: self.overlap_length = None
def test_get_most_common_kmers(self): '''Test get_most_common_kmers''' reads1 = os.path.join(data_dir, 'kcount_test.get_commonest_kmer_1.fa') reads2 = os.path.join(data_dir, 'kcount_test.get_commonest_kmer_2.fa') counts = kcount.get_most_common_kmers(reads1, reads2, kmer_length=10, head=100000, min_count=2, max_count=4, most_common=100, method='kmc') self.assertDictEqual({'AGCTAAAACT': 2, 'CTATATCTCA': 3}, counts)
def __init__(self, extend_length=50, overlap_length=None, reads1=None, reads2=None, seq=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000, kmc_threads=1, map_threads=1, sequences_to_ignore=None, contigs_to_check=None): if contigs_to_check is None: contigs_to_check = {} if sequences_to_ignore is None: sequences_to_ignore = set() self.verbose = verbose self.kmc_threads = kmc_threads self.map_threads = map_threads self.extend_length = extend_length self.ext_min_cov = ext_min_cov self.ext_min_ratio = ext_min_ratio self.seed_lengths = [] self.overlap_length = overlap_length if seq is None: if reads1 is None: raise Error( 'Cannot construct Seed object. Need reads when no seq has been given' ) kmer_counts = kcount.get_most_common_kmers( reads1, reads2, most_common=1, min_count=seed_min_count, max_count=seed_max_count, kmer_length=seed_length, verbose=self.verbose, ignore_seqs=sequences_to_ignore, contigs_to_check=contigs_to_check, kmc_threads=self.kmc_threads, map_threads=self.map_threads) if len(kmer_counts) == 1: self.seq = list(kmer_counts.keys())[0] if self.verbose: print('Made new seed. kmer coverage', list(kmer_counts.values())[0], 'and seed is', self.seq, flush=True) else: self.seq = None else: self.seq = seq if self.seq is not None: if overlap_length is None: self.overlap_length = len(self.seq) else: self.overlap_length = overlap_length else: self.overlap_length = None