示例#1
0
def medianString(input, k):
    dnas = input.split()
    distance = sys.maxint
    kmers = []
    median = ''
    for i in range(0, int(math.pow(4, k))):
        cur = i
        sb = ''
        for j in range(0, k):
            if cur % 4 == 0:
                sb = sb + 'A'
            elif cur % 4 == 1:
                sb = sb + 'C'
            elif cur % 4 == 2:
                sb = sb + 'G'
            else:
                sb = sb + 'T'
            cur = cur / 4
        kmers.append(sb)
    for kmer in kmers:
        distanceSum = 0
        for dna in dnas:
            d = sys.maxint
            for i in range(0, len(dna) - k + 1):
                if hammingDistance(kmer, dna[i:i + k]) < d:
                    d = hammingDistance(kmer, dna[i:i + k])
            distanceSum = distanceSum + d
        if distanceSum < distance:
            distance = distanceSum
            median = kmer
    print distance
    print median
    return
def frequentWordsWithMismatches(text, k, d):
    count = dict()
    max = 0
    for i in range(0, int(math.pow(4, k))):
        cur = i
        sb = ''
        for j in range(0, k):
            if cur % 4 == 0:
                sb = sb + 'A'
            elif cur % 4 == 1:
                sb = sb + 'C'
            elif cur % 4 == 2:
                sb = sb + 'G'
            else:
                sb = sb + 'T'
            cur = cur / 4
        count[sb] = 0
    for key, value in count.items():
        for i in range(0, len(text) - k + 1):
            if hammingDistance(key, text[i:i + k]) <= d:
                count[key] = count[key] + 1
                if count[key] > max:
                    max = count[key]
    for key, value in count.items():
        if value == max:
            print key,
    return
示例#3
0
def approxPatternMatching(pattern, text, k):
    pattern = pattern.replace(' ', '')
    text = text.replace(' ', '')
    for i in range(0, len(text) - len(pattern) + 1):
        if hammingDistance(pattern, text[i:i + len(pattern)]) <= k:
            print i,
    return
def motifEnumeration(input, k, d):
    dnas = input.split()
    patterns = []
    kmers = []
    for i in range(0, int(math.pow(4, k))):
        cur = i
        sb = ''
        for j in range(0, k):
            if cur % 4 == 0:
                sb = sb + 'A'
            elif cur % 4 == 1:
                sb = sb + 'C'
            elif cur % 4 == 2:
                sb = sb + 'G'
            else:
                sb = sb + 'T'
            cur = cur / 4
        kmers.append(sb)
    for kmer in kmers:
        count = 0
        for dna in dnas:
            for i in range(0, len(dna) - k + 1):
                if hammingDistance(kmer, dna[i:i + k]) <= d:
                    count = count + 1
                    break
        if count == len(dnas):
            patterns.append(kmer)
    patterns = set(patterns)
    for p in patterns:
        print p
    return
示例#5
0
def medianString(k, dna):

    print("starting frequentWordsMismatch")
    # print("text: ", text)
    count = 0
    highCount = 0
    rstring = ""

    k = int(k)
    # print("K: ",k)
    dict = {}
    kmers = allKmers(k)
    for i in range(len(kmers)):
        dict[kmers[i]] = k
    dictArray = []

    keys = list(dict.keys())

    # for every dna
    for entry in dna:
        # make a copy of dict
        dictCopy = copy.copy(dict)
        # move window over whole dna
        for w in range(len(entry)):
            if (w == (len(entry) - k - 1)):
                break
            window = entry[w:w + k]
            #print("window ", window, " : ", k)

            # hamminging distance window with every key
            for key in keys:
                # stort value in dict copy
                ham = hammingDistance(window, key)
                #print("Ham: ", ham, dictCopy[key])
                if int(ham) < dictCopy[key]:
                    dictCopy[key] = int(ham)
                #print(key, " window: ", window, " HD: ", dictCopy[key])
            # append dictcopy to dict Array
        dictArray.append(dictCopy)
        #print(dictCopy)

    # compare keys and return one with least distance.
    dictCopy = copy.copy(dict)
    for key in keys:
        count = 0
        #print("dictArrayLen: ",len(dictArray))
        for entry in dictArray:
            #print("key: ", key," count: ", count, " val: ", int(entry[key]))
            count = count + int(entry[key])
        dictCopy[key] = count
    #print(dictCopy)
    res = ""
    #this is lazy and will break with big enough input
    finalCount = 99999
    for key in keys:
        if dictCopy[key] < finalCount:
            res = key
            finalCount = dictCopy[key]
    return res
示例#6
0
def approximateMatches(Pattern, Text, d):
    # Input: Strings Pattern and Text as well as an integer d.
    # Output: Countd(Text, Pattern).
    count = 0
    k = len(Pattern)
    for i in range(0, len(Text) - k + 1):
        if (hammingDistance(Pattern, Text[i:i + k]) <= d):
            count = count + 1

    return count
示例#7
0
def Neighbors(Pattern, d):
    
    # Input: A string Pattern and an integer d.
    # Output: The collection of strings Neighbors(Pattern, d).
    if d == 0:
        return {Pattern}
    if len(Pattern) == 1:
        return {"A", "C", "G", "T"}
    Neighborhood = list()
    suffixNeighbors =  Neighbors(Pattern[1:], d)
    for text in suffixNeighbors:
        if hammingDistance(Pattern[1:], text) <= d:
            for x in {"A", "C", "G", "T"}:
                Neighborhood.append((x+text))
        else:
            Neighborhood.append((Pattern[0]+text))

    return Neighborhood
示例#8
0
def MotifEnumeration(Dna, k, d):
    patterns = set()
    for word in Dna.split("\n"):
        for i in range(0, len(word) - k + 1):
            pattern = word[i:i + k]
            neighborhood = Neighbors(pattern, d)
            for neighbor in neighborhood:
                found = False
                valid = True
                for string in Dna.split("\n"):
                    for j in range(0, len(string) - k + 1):
                        if hammingDistance(neighbor, string[j:j + k]) <= d:
                            found = True
                    if not found:
                        valid = False
                    found = False

                if valid:
                    patterns.add(neighbor)

    return patterns