def frequent_words(text, k):
    frequent_patterns = set()
    count = []
    N = len(text)
    for i in range(N-k):
        pattern = text[i:i+k]
        count.append(pattern_count(text, pattern))
    max_count = max(count)
    for i in range(N-k):
        if count[i] == max_count:
            frequent_patterns.add(text[i:i+k])
    return frequent_patterns
示例#2
0
def most_frequent(text, k):
    hist = {}
    for i in range(len(text) - k):
        pattern = text[i:i + k]
        count = pattern_count(text, pattern)
        hist[pattern] = count

    maximum = max(hist.values())

    for key in hist:
        if hist[key] == maximum:
            print(key)
def frequent_words(text, k):
	"""Returns the most frequent kmer of length k found in text"""	
	frequent_patterns = set()
	count = []
	for i in range(len(text)-k):
		pattern = text[i:i+k]
		count.append(pattern_count.pattern_count(text, pattern))
	#return count
	max_count = max(count)
	#return max_count
	for i in range(len(text)-k):
		if count[i] == max_count:
			frequent_patterns.add(text[i:i+k])
	return frequent_patterns
示例#4
0
def frequent_words(genome: str, k: int):
    frequent_patterns = set()

    length: int = len(genome) - k + 1
    count: list = []
    for i in range(length):
        pattern = genome[i:i + k]
        count.insert(i, pattern_count(genome, pattern))
    print("count", count)
    max_count = max(count)
    for i in range(length):
        val = count[i]
        if val == max_count:
            frequent_patterns.add(genome[i:i + k])
    return frequent_patterns
def cluster_finding(k, t, l, genome):
	"""returns clusters of kmers of length k of incidence t in sliding widows of
	length l in string genome"""

	count2 = []
	clusters = set()
	for i in range(len(genome)-l+1):		
		pattern = genome[i:i+k]
		window = genome[i:i+l]
		#count2.append(pattern)
		#count2.append(window)
		count2.append(pattern_count.pattern_count(window, pattern))
	#return count2
	for i in range(len(genome)-l+1):
		if count2[i] >= t:
			clusters.add(genome[i:i+k])
	return clusters
示例#6
0
def frequent_words(text, pattern_length):
    count = 0
    kmer_patterns = {}
    for i in range(0, len(text)-pattern_length):
        pattern = text[i:i+pattern_length]
        count = pattern_count(text, pattern)
        
        if pattern in kmer_patterns:
            kmer_patterns[pattern] += 1
        else:
            kmer_patterns[pattern] = count
    
    max_count = max(kmer_patterns.values())
    max_patterns = [k for k, v in kmer_patterns.items() if v == max_count]
    
    print(" ".join(max_patterns))
    return " ".join(max_patterns)
示例#7
0
def pattern_frequencies(text, patterns):
    return [(pattern, pattern_count(text, pattern)) for pattern in patterns]