Exemplo n.º 1
0
def approximateMatches(Pattern, Text, d):
    # Input: Strings Pattern and Text along with an integer d.
    # Output: All starting positions where Pattern appears as a substring of Text with at most d mismatches.
    positions = list()
    k = len(Pattern)
    for i in range(0, len(Text) - k + 1):
        if (hammingDistance(Pattern, Text[i:i + k]) <= d):
            positions.append(i)

    return listToString(positions)
Exemplo n.º 2
0
def findClumps(Text, k, L, t):
    # Input: A string Genome, and integers k, L, and t.
    # Output: All distinct k-mers forming (L, t)-clumps in Genome.
    patterns = set()
    for i in range(0, len(Text) - L):
        textWindow = Text[i:i + L]
        freqMap = frequencyTable(textWindow, k)
        for key in freqMap:
            if freqMap.get(key) >= t:
                patterns.add(key)
    return listToString(list(patterns))
Exemplo n.º 3
0
    while True:
        profile = constructProfile(motifs, k, t)
        motifs = Motifs(Dna, k, profile)
        if score(motifs) < score(bestMotif):
            bestMotif = motifs
        else:
            return bestMotif


def Motifs(Dna, k, profile):
    motifs = []
    for seq in Dna.split("\n"):
        motifs.append(profileMostProbable(seq, k, profile))
    return motifs


def iteration_RMS(Dna, k, t, iter):
    bestMotifs = RandomizedMotifSearch(Dna, k, t)
    for i in range(0, iter - 1):
        motifs = RandomizedMotifSearch(Dna, k, t)
        if score(motifs) < score(bestMotifs):
            bestMotifs = motifs
    return bestMotifs


if __name__ == "__main__":
    bestMotifs = iteration_RMS(
        "AGTCAACCCGGATCGCGGGTCGGTATGGCCACAGCACTCCGCGAACAGTCGGATAGGCAAGCACGGGGATCCGAACGCTCCAAGCGGCCATAAATTAGTTTCTTCTGGGTGCCATTAGAACTAGCCATCCTGCTAACCGCTCGGCGTAATGTGCACACTGCCTAAATTGCTTTAACGCATCCTATAAGTCAACCCGGATCG\nCGGGTCGGTATGGCCACAGCACTCCGCGAACAGTCGGATAGGCAAGCACGGGGATCCGAACGCTCCAAGCGGCCATATCGCTCCGGACCTCCAATTAGTTTCTTCTGGGTGCCATTAGAACTAGCCATCCTGCTAACCGCTCGGCGTAATGTGCACACTGCCTAAATTGCTTTAACGCATCCTATAAGTCAACCCGGATCG\nTGCGGGACTTGTAGATGGAATGCAGTTCTTGAGAGGTCCGTGCCTGGTTTTATCATTCCCGGGGAGCCCGATTGGGTGCCTGAGGCTTTCGGACTACAGGTTCCCCATAAACCACGAACCAATAGTAGCAGAAGCCAATAGCATCGAACTGGGCCCCCTTGCGAACGGACCTCCGCACGGGTGAGTATACTTCGGATACCC\nCGATATTGACACTGGTGACTCCCGCACCCTCATCGAGCGACCCCTGGGTTGCAGACATTAAGTTCTTGTTACACGTCATATTCATTTGGCTAAGGCTGCGGTAGTAAGCTACCCGGGACGATCTCAAAATCGAAAGTAACCTCCTTAGGGAAAACCTACTCACTCACCCACGGCCGTTCATCACTTTCGGACCATTCAGGT\nACTGTAGGCTTTTCAGGTTAAGACCACTACCCTTGGAAGCGCCAGGTGGGACCACCAATGAGATGATACATATGGGGCAGTTCTCTGGATGATTACACGCCACTCAGTCTAGCTTGTTTGCGCCAGTGAAAATTTGTCAACAAAGACGGCGATTGTAGGTCGAAACGGTAGTCCACTCGTGTTTCCATAAATGCCCTCGAA\nCATAGTTAGATCCACGTGCTGTTCCTAATAGAGACTTTACGCAATGCCGTATGGCTCTCTACGATGGGCTCAGGGGCATAGTTAAGTTACAAGTCAATTGAGATAGACTGGCGCAGACTTTGTGTCGAAACCTTCCTCCACTACGATGTTACCAACAGGGTTAGTGCAAGCATCCATCCATTATTTCACTCTGGCGGACTA\nAATGGCATGGTCCGCAGCCAAATATCGAGACACGAGTTGATACACCGTTCGATTAGCAACTCGAAAGTTACCTCCCTAGAGCGGCGTCGTCGGTTGATGTCATCTTTTCTCTGATCGCATTAATCTCGTGTAAAAAAATTAATGCATACTGAGGCACAGCTTCTTACAATCATGTGAGGTACTTCCGCCTGAATTCTGATG\nGCCTGTTGATAGGTCGGCTTTGTGTTGCCACTGCACAAGGAGGTAGCGTAGAGTGGCGTCACTTTCCAACTCGCGCTTTCAATGACCCCAACGTGCTGCAGATATGTGTAGAGTGAAATTACATAACGCCTTGCTATCCGTCCGTATAATTCCCAGTGGTTCGAAACGCTGCTCCAGGGATTAGGTGGCTAGCGAGGGCTT\nAAAGCTCTAAGATGTTCGCATGCGCAACGTGTAATTCAGGTACGCACGTCGAAACGGACCAAGTGACGATGGTTCACAACCGTTTGCCTAAGCGCGAGCAGTAACGTAAGCAAGTAGCGTGAACCAAGAGCATACCCTGTCTCTGGTTGAGACTATCGGCTTTGATAGAGCGTCTACAGTGCCACTCGTACATGTTCTCGA\nCAATAGCACTCGTTTTCCGATCGACCAGGACCTCCCCTTACCAGGCGGACTGTAATGAGCTGAACGGTTAAAAGGTCCAGTAGTAGCGTCGTGTCAATACCGGTCTTCTGACATCGGGGGAGGCCTGCGGAACAAAAAAAACAATATTATTTTGAATCGGGATCAGGGCACACACGAAATCATAGACACTACGTCTCGGCT\nGGTAGCATGCAAGACGGGGTCACCGTATGCAAGACAACTCACGTTAATAAAGGTCGAGCCAGCGGTTATAGGACACGAATTGAAGGTTTGTATAGACGGGCAAGCCTCATCCGTTGCCCATTGAGTCCCCTCGGGTCGACGAGGACCTCCTACACTGGATTTTTACCCAAGCCAAGACGAAAAACCTAGGCAAGTAACATT\nAGTTTTTGTATGCCTTTATTTGGGGGCGCAAAGGTTTGGATTTTAACTCAGGCGAGCTACGCCTACCGAAATTAAGCACCTGGTTAACGACGGGAAGAATATCTCACCTACAACTGACAAAGACCAGGCGCCACGTTCAGGTGACACGATTGGTAGGCGGTTCCTACCTTCCGATCGAAACGGACACACGATGTCTTGACA\nCCAGGCTGCTCCCAGTTATCGAGGCATCCTTGTGACTAGCGCCTCGAACGAGACCTCCTACCTATTGCTATATAAATCCAGCGTCGGATCTCACCCGAGACTCATCCCGTGCCTTCCCAGCCCTTGTTGGTCTAGTGCAGCTTAATTCTGTCGATAAAAGGGATGCTAGGTAAGTCATTCACGAATTTTCGTGTGTAGTAT\nACCTTGTAATATCTAATCGGCAGCTTCTCCAATACCATCGTCTCGGACCTCCAGTCGCTCGCTGCGTACGTACTCGTGTTCACACAGGGCGAACGCTGGCGGGCCGGGACGCTTCAGGGAGAGCGTTAGGCCATCAGCGTGACTCGGACGGTGACTATTGGATCTCACCCCCACTCAGGCGTCTCGGCACCCTCATAACCC\nCACTATCCTACCGTATCGGGCTCGAATATGCGCCATCCGAAGGGAGCGATTCAATAGGGACGGCCGGGTCGGCGCGCATATCTTGACTGCACGAAAATGAAACGGACCTCATATTCATGATCCACTGACTAGTAAATAGCTGCGAGGGCAGTTGTGGCAGGTCTTACAGGGTAAACGTTAACGAACTCCACTTTTCGTGGG\nTCACGCCCCTCGACGAAGTCGAAACGGAGACCCGTCCCACGCACATAGGAAGCCAGCGTCATCTGGCTTATGGGGCCGTAGGCTGGATGGTAGGCTCGACGTAGAGTGTCTCAATACAAGCTATTGGGCGCAGATTGATAAACCGCCCGCTTTTCAGACGTTAGTTACCTTCTGTGTGAGGATTGCAACATTACTTGCTAA\nACCCATCGCAGACCGAACTGGGGTTCTAGTACACGACGCCCACGCGATTTGTAGCGCACTTATTGGACTTGAGCCACTCAGCTCCCAGTCCATTCATATTTGTTACAATACGTCCCGTACCAGTGGAACCCCGTTTCCCCGACGGGCGTGCCCGGAGCATCTCGTAGAGGAAAAACGGACCTCCTGAAGGCGATTCGCCTG\nCTATATCCCTGGGTGCTTCACTCAGTCGAATGTGACCTCCAAGTAAAGAACCGGGATTACTTACCAACCTTAGCCGATAGCGATACTATAACGCTCCACCATCCTGACTGGCAACGCACCATTCCTCTTGCAACATGACGACCTGCCTTATTGGCCGATATTTAAGTCAAGAGACTCTGGTAAGTGCAGACGGTGTAAAAC\nGCTACGCCCGGGTGAGGAACGGGGTCCTCCACAGTTAACTATATTGCCAAATTCTTGGAGAAAGTCATAAACTTAAAATATCCATTGCAATACGAAGACCGTGGGGATGAACGCCGCGGGCTGTCTCTGTGTGGTGCTAAAACTCCCGGTGTAACCGATGCATGTTCTTTATCAGCACGGACCTCCCCATGATGCGCTCGC\nGGACATGATGGACTCAAGAGACTTCAGACGCCCTGGCCCCCGACTGCTTTTCTCGCCCGTGCGTAGCCCGCCGAAACGGACCTAAATAACCCCTTGACATACCAATGTACGATCTCGTCAATGTCCTTTTTACCCAACCCGATCAACAAGTCAGGGTCGAGCTACCGCCTGAAGTTGGCCACACACCGCAACCCAGACCGG",
        15, 20, 1000)
    print(listToString(bestMotifs).replace(" ", "\n"))
    print(score(bestMotifs))
Exemplo n.º 4
0
    # Output: All integer(s) i minimizing Skewi (Genome) among all values of i (from 0 to |Genome|)
    minVal = 999  # Is it best to start from 0?
    skewness = 0
    positions = list()
    skewValues = {
        "G": 1,
        "C": -1,
        "A": 0,
        "T": 0,
        "g": 1,
        "c": -1,
        "a": 0,
        "t": 0
    }

    for i in range(0, len(Genome)):
        skewness = skewness + skewValues.get(Genome[i])
        if (minVal > skewness):
            positions = list()
            minVal = skewness
        if (minVal == skewness):
            positions.append(i + 1)

    return (skewness, positions)


if __name__ == "__main__":
    with open('dataset_369238_6.txt', 'r') as file:
        data = file.read().replace("\n", "")
        print(listToString(minimumSkew(data)[1]))
Exemplo n.º 5
0

def profile_random_probability_kmer(string, k, profile):
    translation = {"A": 0, "C": 1, "G": 2, "T": 3}
    probabilities = {}
    for i in range(0, len(string) - k + 1):
        kmer = string[i:i + k]
        probabilities[kmer] = 1.0
        for j in range(0, len(kmer)):
            probabilities[kmer] = probabilities[kmer] * profile[translation[
                kmer[j]]][j] * 1.0
    random_choice = random.choices(list(probabilities.keys()),
                                   weights=probabilities.values())
    return string.join(random_choice)


def iteration_GS(Dna, k, t, N, iter):
    bestMotifs = GibbsSampler(Dna, k, t, N)
    for i in range(0, iter - 1):
        motifs = GibbsSampler(Dna, k, t, N)
        if score(motifs) < score(bestMotifs):
            bestMotifs = motifs
    return bestMotifs


if __name__ == "__main__":
    print(
        listToString(
            iteration_GS(
                "GCGCCCCGCCCGGACAGCCATGCGCTAACCCTGGCTTCGATGGCGCCGGCTCAGTTAGGGCCGGAAGTCCCCAATGTGGCAGACCTTTCGCCCCTGGCGGACGAATGACCCCAGTGGCCGGGACTTCAGGCCCTATCGGAGGGCTCCGGCGCGGTGGTCGGATTTGTCTGTGGAGGTTACACCCCAATCGCAAGGATGCATTATGACCAGCGAGCTGAGCCTGGTCGCCACTGGAAAGGGGAGCAACATC\nCCGATCGGCATCACTATCGGTCCTGCGGCCGCCCATAGCGCTATATCCGGCTGGTGAAATCAATTGACAACCTTCGACTTTGAGGTGGCCTACGGCGAGGACAAGCCAGGCAAGCCAGCTGCCTCAACGCGCGCCAGTACGGGTCCATCGACCCGCGGCCCACGGGTCAAACGACCCTAGTGTTCGCTACGACGTGGTCGTACCTTCGGCAGCAGATCAGCAATAGCACCCCGACTCGAGGAGGATCCCG\nACCGTCGATGTGCCCGGTCGCGCCGCGTCCACCTCGGTCATCGACCCCACGATGAGGACGCCATCGGCCGCGACCAAGCCCCGTGAAACTCTGACGGCGTGCTGGCCGGGCTGCGGCACCTGATCACCTTAGGGCACTTGGGCCACCACAACGGGCCGCCGGTCTCGACAGTGGCCACCACCACACAGGTGACTTCCGGCGGGACGTAAGTCCCTAACGCGTCGTTCCGCACGCGGTTAGCTTTGCTGCC\nGGGTCAGGTATATTTATCGCACACTTGGGCACATGACACACAAGCGCCAGAATCCCGGACCGAACCGAGCACCGTGGGTGGGCAGCCTCCATACAGCGATGACCTGATCGATCATCGGCCAGGGCGCCGGGCTTCCAACCGTGGCCGTCTCAGTACCCAGCCTCATTGACCCTTCGACGCATCCACTGCGCGTAAGTCGGCTCAACCCTTTCAAACCGCTGGATTACCGACCGCAGAAAGGGGGCAGGAC\nGTAGGTCAAACCGGGTGTACATACCCGCTCAATCGCCCAGCACTTCGGGCAGATCACCGGGTTTCCCCGGTATCACCAATACTGCCACCAAACACAGCAGGCGGGAAGGGGCGAAAGTCCCTTATCCGACAATAAAACTTCGCTTGTTCGACGCCCGGTTCACCCGATATGCACGGCGCCCAGCCATTCGTGACCGACGTCCCCAGCCCCAAGGCCGAACGACCCTAGGAGCCACGAGCAATTCACAGCG\nCCGCTGGCGACGCTGTTCGCCGGCAGCGTGCGTGACGACTTCGAGCTGCCCGACTACACCTGGTGACCACCGCCGACGGGCACCTCTCCGCCAGGTAGGCACGGTTTGTCGCCGGCAATGTGACCTTTGGGCGCGGTCTTGAGGACCTTCGGCCCCACCCACGAGGCCGCCGCCGGCCGATCGTATGACGTGCAATGTACGCCATAGGGTGCGTGTTACGGCGATTACCTGAAGGCGGCGGTGGTCCGGA\nGGCCAACTGCACCGCGCTCTTGATGACATCGGTGGTCACCATGGTGTCCGGCATGATCAACCTCCGCTGTTCGATATCACCCCGATCTTTCTGAACGGCGGTTGGCAGACAACAGGGTCAATGGTCCCCAAGTGGATCACCGACGGGCGCGGACAAATGGCCCGCGCTTCGGGGACTTCTGTCCCTAGCCCTGGCCACGATGGGCTGGTCGGATCAAAGGCATCCGTTTCCATCGATTAGGAGGCATCAA\nGTACATGTCCAGAGCGAGCCTCAGCTTCTGCGCAGCGACGGAAACTGCCACACTCAAAGCCTACTGGGCGCACGTGTGGCAACGAGTCGATCCACACGAAATGCCGCCGTTGGGCCGCGGACTAGCCGAATTTTCCGGGTGGTGACACAGCCCACATTTGGCATGGGACTTTCGGCCCTGTCCGCGTCCGTGTCGGCCAGACAAGCTTTGGGCATTGGCCACAATCGGGCCACAATCGAAAGCCGAGCAG\nGGCAGCTGTCGGCAACTGTAAGCCATTTCTGGGACTTTGCTGTGAAAAGCTGGGCGATGGTTGTGGACCTGGACGAGCCACCCGTGCGATAGGTGAGATTCATTCTCGCCCTGACGGGTTGCGTCTGTCATCGGTCGATAAGGACTAACGGCCCTCAGGTGGGGACCAACGCCCCTGGGAGATAGCGGTCCCCGCCAGTAACGTACCGCTGAACCGACGGGATGTATCCGCCCCAGCGAAGGAGACGGCG\nTCAGCACCATGACCGCCTGGCCACCAATCGCCCGTAACAAGCGGGACGTCCGCGACGACGCGTGCGCTAGCGCCGTGGCGGTGACAACGACCAGATATGGTCCGAGCACGCGGGCGAACCTCGTGTTCTGGCCTCGGCCAGTTGTGTAGAGCTCATCGCTGTCATCGAGCGATATCCGACCACTGATCCAAGTCGGGGGCTCTGGGGACCGAAGTCCCCGGGCTCGGAGCTATCGGACCTCACGATCACC",
                15, 10, 2000, 20)).replace(" ", "\n"))
Exemplo n.º 6
0
#  1.8 Some Hidden Messages are More Elusive than Others 
from Ex7 import hammingDistance
from Ex4 import listToString
from Ex2 import MaxMap
from Ex10 import Neighbors
def FrequentWordsWithMismatches(Text, k, d):    
    # Input: A string Text as well as integers k and d. (You may assume k <= 12 and d <= 3.)
    # Output: All most frequent k-mers with up to d mismatches in Text.
    Patterns = list()
    freqMap = {}
    n = len(Text)
    for i in range(0, n-k):
        pattern = Text[i:i+k]
        neighborhood = list(Neighbors(pattern, d))
        for j in range(0, len(neighborhood)-1):
            neighbor = neighborhood[j]
            if neighbor not in freqMap.keys():
                freqMap[neighbor] = 1
            else:
                freqMap[neighbor] = freqMap.get(neighbor) + 1
    m = MaxMap(freqMap)
    for key in freqMap:
        if freqMap[key] == m:
            Patterns.append(key)
    return Patterns

if __name__ == "__main__":
    print(listToString(FrequentWordsWithMismatches("GGTAATATCTTAAATCTTGGTAGGTAATAAATTAAAGGTAATAATAAATTGGTAAATTAAAATAGGTAAAAATAGGTAATAAAAATAAATTAAAAATTATAATAAAAATATCTTTCTTATAATAAAAAAATCTTGGTATCTTTCTTAAAAATTAAAATAAATTAATTTCTTAAATCTTATAGGTAGGTAGGTAAATTAATTAAAAAAGGTAATATCTTAAAAAAAAAAAATCTTTCTTTCTTGGTATCTTTCTTAATTGGTATCTTATAGGTAATATCTTTCTTGGTAATAAAAAATTTCTTTCTTGGTAATAGGTAAATTAAAAATTAATTATAAATTAAATCTTGGTAGGTAATATCTTTCTTAAATCTTAATT", 6, 3)))