def gendnaword1to6(n, maptypetoparam): """ Generate and return a set of DNA words satisfying C1 and C6 constraints. Inputs: + n: the number of strings to generate + maptypetoparam: a dictionary that maps an integer (representing the constraint type) to the parameter corresponding to that constraint type. It must have 1, 2, ..., 6 as keys Output: + strlist: a Python list of DNA words satisfying C1 and C6 constraints Example: to generate a set of 25 DNA words satisfying C1(8), C2(4), C3(5), C4(7), C5(8), C6(10), call the function gendnaword1to6(25, {1 : 8, 2 : 4, 3 : 5, 4 : 7, 5 : 8, 6 : 10}) """ if n <= 0: return [] M = genbinword14(n, maptypetoparam[1], maptypetoparam[4]) # In M, change '0' to 'A' and '1' to 'T' helper.change_char_in_mat(M, range(len(M[0])), {'0': 'A', '1': 'T'}) k = max(maptypetoparam[2], maptypetoparam[3], maptypetoparam[5], maptypetoparam[6]) leadingstr = 'C' * k # Append k = max(k2, k3, k5, k6) to the start of each row in M strlist = [] for row in xrange(n): strlist.append(leadingstr + ''.join(M[row])) return strlist
def gendnaword1to6(n, maptypetoparam): """ Generate and return a set of DNA words satisfying C1 and C6 constraints. Inputs: + n: the number of strings to generate + maptypetoparam: a dictionary that maps an integer (representing the constraint type) to the parameter corresponding to that constraint type. It must have 1, 2, ..., 6 as keys Output: + strlist: a Python list of DNA words satisfying C1 and C6 constraints Example: to generate a set of 25 DNA words satisfying C1(8), C2(4), C3(5), C4(7), C5(8), C6(10), call the function gendnaword1to6(25, {1 : 8, 2 : 4, 3 : 5, 4 : 7, 5 : 8, 6 : 10}) """ if n <= 0: return [] M = genbinword14(n, maptypetoparam[1], maptypetoparam[4]) # In M, change '0' to 'A' and '1' to 'T' helper.change_char_in_mat(M, range(len(M[0])), {'0': 'A', '1' : 'T'}) k = max(maptypetoparam[2], maptypetoparam[3], maptypetoparam[5], maptypetoparam[6]) leadingstr = 'C' * k # Append k = max(k2, k3, k5, k6) to the start of each row in M strlist = [] for row in xrange(n): strlist.append(leadingstr + ''.join(M[row])) return strlist
def gendnaword12378(n, maptypetoparam): """ Generate and return a set of DNA words satisfying C1, C2, C3, C7 and C8 constraints. Inputs: + n: the number of strings to generate + maptypetoparam: a dictionary that maps an integer (representing the constraint type) to the parameter corresponding to that constraint type. It must have 1, 2, 3, 7 and 8 as keys. Output: + strlist: a Python list of DNA words satisfying C1, C2, C3, C7 and C8 constraints Example: to generate a set of 50 DNA words satisfying C1(8), C2(4), C3(5), C7(0.7), C8(3), call the function gendnaword12378(50, {1 : 8, 2 : 4, 3 : 5, 7 : 0.7, 8 : 3}) """ if n <= 0: return [] if maptypetoparam[7] > 1: return [] M = genbinword14(n, maptypetoparam[1], 1) l0 = len(M[0]) if l0 & 1: # If l0 is odd, append '0' at the end of every word in M so that # the new length is even for row in xrange(n): M[row].append('0') strlist = [] k = max(maptypetoparam[2], maptypetoparam[3]) for row in xrange(n): newlist = ['1'] * k newlist.extend(M[row]) newlist.extend(['1'] * k) strlist.append(''.join(newlist)) # Break run for strid in xrange(n): strlist[strid] = algo_subroutine.breakrun(strlist[strid], maptypetoparam[8]) newlen = len(strlist[0]) chosencolumn = [] allcolumn = range(newlen) if int(maptypetoparam[7]) == 1: chosencolumn = range(newlen) else: chosencolumn = helper.choose_random_pos_list(newlen, int(math.ceil(maptypetoparam[7] * newlen))) for strid in xrange(n): curlist = list(strlist[strid]) helper.change_char_in_mat([curlist], chosencolumn, {'0': 'C', '1': 'G'}) helper.change_char_in_mat([curlist], allcolumn, {'0': 'A', '1':'T'}) strlist[strid] = ''.join(curlist) return strlist
def gendnaword1to7(n, maptypetoparam): """ Generate and return a set of DNA words satisfying C1 and C7 constraints. Inputs: + n: the number of strings to generate + maptypetoparam: a dictionary that maps an integer (representing the constraint type) to the parameter corresponding to that constraint type. It must have 1, 2, ..., 7 as keys. Outputs: + strlist: a Python list of DNA words satisfying C1 and C7 constraints Example: to generate a set of 25 DNA words satisfying C1(8), C2(4), C3(5), C4(7), C5(8), C6(10), C7(0.7), call the function gendnaword1to7(25, {1 : 8, 2 : 4, 3 : 5, 4 : 7, 5 : 8, 6 : 10, 7 : 0.7}) """ if n <= 0: return [] if maptypetoparam[7] > 1: return [] M = genbinword14(n, maptypetoparam[1], maptypetoparam[4]) k = max(maptypetoparam[2], maptypetoparam[3], maptypetoparam[5], maptypetoparam[6]) l = len(M[0]) + k + k chosencolumn = [] if int(maptypetoparam[7]) == 1: chosencolumn = range(l) else: chosencolumn = helper.choose_random_pos_list( l, int(math.ceil(maptypetoparam[7] * l))) allcolumn = range(l) strlist = [] for row in xrange(n): # Append k instances of '1' at the beginning and the end of each row in M newlist = ['1'] * k newlist.extend(M[row]) newlist.extend(['1'] * k) helper.change_char_in_mat([newlist], chosencolumn, { '0': 'C', '1': 'G' }) helper.change_char_in_mat([newlist], allcolumn, {'0': 'A', '1': 'T'}) strlist.append(''.join(newlist)) return strlist
def gendnaword1to7(n, maptypetoparam): """ Generate and return a set of DNA words satisfying C1 and C7 constraints. Inputs: + n: the number of strings to generate + maptypetoparam: a dictionary that maps an integer (representing the constraint type) to the parameter corresponding to that constraint type. It must have 1, 2, ..., 7 as keys. Outputs: + strlist: a Python list of DNA words satisfying C1 and C7 constraints Example: to generate a set of 25 DNA words satisfying C1(8), C2(4), C3(5), C4(7), C5(8), C6(10), C7(0.7), call the function gendnaword1to7(25, {1 : 8, 2 : 4, 3 : 5, 4 : 7, 5 : 8, 6 : 10, 7 : 0.7}) """ if n <= 0: return [] if maptypetoparam[7] > 1: return [] M = genbinword14(n, maptypetoparam[1], maptypetoparam[4]) k = max(maptypetoparam[2], maptypetoparam[3], maptypetoparam[5], maptypetoparam[6]) l = len(M[0]) + k + k chosencolumn = [] if int(maptypetoparam[7]) == 1: chosencolumn = range(l) else: chosencolumn = helper.choose_random_pos_list(l, int(math.ceil(maptypetoparam[7] * l))) allcolumn = range(l) strlist = [] for row in xrange(n): # Append k instances of '1' at the beginning and the end of each row in M newlist = ['1'] * k newlist.extend(M[row]) newlist.extend(['1'] * k) helper.change_char_in_mat([newlist], chosencolumn, {'0': 'C', '1': 'G'}) helper.change_char_in_mat([newlist], allcolumn, {'0': 'A', '1': 'T'}) strlist.append(''.join(newlist)) return strlist
def gendnaword14(n, maptypetoparam): """ Generate and return a set of DNA words satisfying C1 and C4 constraints. Inputs: + n: the number of strings to generate + maptypetoparam: a dictionary that maps an integer (representing the constraint type) to the parameter corresponding to that constraint type. It must have 1 and 4 as keys Output: + strlist: a Python list of DNA words satisfying C1 and C4 constraints Example: to generate a set of 15 DNA words satisfying C1(8) and C4(9), call the function gendnaword14(15, {1 : 8, 4 : 9 }) """ if n <= 0: return [] M = genbinword14(n, maptypetoparam[1], maptypetoparam[4]) helper.change_char_in_mat(M, range(len(M[0])), {'0': 'C', '1': 'G'}) return helper.convert_mat_to_strlist(M, n)
def gendnaword1to8algo1(n, maptypetoparam): """ Generate and return a set of DNA words satisfying C1 through C8 constraints. Inputs: + n: the number of strings to generate + maptypetoparam: a dictionary that maps an integer (representing the constraint type) to the parameter corresponding to that constraint type. It must have 1, 2, ..., 8 as keys. Output: + strlist: a Python list of DNA words satisfying C1 through C8 constraints Exception: + A RuntimeError will be raised if the following condition is NOT satified: 1 / (d + 1) <= gamma <= d / (d + 1) where gamma = maptypetoparam[7] and d = maptypetoparam[8] """ gamma = maptypetoparam[7] if n <= 0 or gamma > 1: return [] d = maptypetoparam[8] if (1.0 / (d + 1) > gamma) or (gamma > d * 1.0 / (d + 1)): raise RuntimeError( "gendnaword1to8algo1 works only if 1 / (d + 1) <= gamma <= d / (d + 1)" ) # Generate the set of strings satisfies C1 constraint M = genbinword14(n, max(maptypetoparam[1], maptypetoparam[4]), 1) k = max([maptypetoparam[i] for i in xrange(2, 6 + 1)]) newM = [] for row in xrange(n): newrow = [] newrow.extend(['1'] * k) newrow.extend(M[row]) newrow.extend(['1'] * k) newM.append(newrow) # Find oddlist and evenlist as mentioned in Step 3 (see comments above) newlen = len(newM[0]) newgamma = gamma if newgamma < 0.5: newgamma = 1 - newgamma numchoose = int(newgamma * newlen) numnotchoose = newlen - numchoose minoddsize = int(numchoose * 1.0 / numnotchoose) numleft = numchoose % numnotchoose oddlist = [] evenlist = [] ind = 0 oddsize = 0 while ind < newlen: oddlist.append(ind) ind += 1 oddsize += 1 if ind < newlen and oddsize == minoddsize: if numleft != 0: oddlist.append(ind) numleft -= 1 ind += 1 if ind < newlen: evenlist.append(ind) ind += 1 oddsize = 0 # Convert binary words into DNA words if gamma < 0.5: helper.change_char_in_mat(newM, oddlist, {'0': 'A', '1': 'T'}) helper.change_char_in_mat(newM, evenlist, {'0': 'C', '1': 'G'}) else: helper.change_char_in_mat(newM, oddlist, {'0': 'C', '1': 'G'}) helper.change_char_in_mat(newM, evenlist, {'0': 'A', '1': 'T'}) return helper.convert_mat_to_strlist(newM, n)
def gendnaword1to8algo2(n, maptypetoparam): """ Generate and return a set of DNA words satisfying C1 through C8 constraints. Inputs: + n: the number of strings to generate + maptypetoparam: a dictionary that maps an integer (representing the constraint type) to the parameter corresponding to that constraint type. It must have 1, 2, ..., 8 as keys. Output: + strlist: a Python list of DNA words satisfying C1 through C8 constraints Exception: + A RuntimeError will be raised if maptypetoparam[8] < 2 """ if n <= 0: return [] if maptypetoparam[7] > 1: return [] if maptypetoparam[8] < 2: raise RuntimeError( "gendnaword1to8algo2 only works with maxlenrun >= 2") # Generate the set of strings satisfies C1 constraint M = genbinword14(n, max(maptypetoparam[1], maptypetoparam[4]), 1) k = max([maptypetoparam[i] for i in xrange(2, 6 + 1)]) newM = [] l0 = len(M[0]) # Prepare the 'string' (list of characters) used later baselist = ['1'] * (maptypetoparam[8] - 1) baselist.append('0') numtime = int(math.ceil(k * 1.0 / maptypetoparam[8])) supplist = baselist * numtime for row in xrange(n): newrow = [] newrow.extend(supplist) newrow.append('1') sublen = 0 for ind in xrange(l0): newrow.append(M[row][ind]) sublen += 1 if (sublen == maptypetoparam[8] - 1) or (ind == l0 - 1): newrow.append(helper.get_complement_letter(M[row][ind])) sublen = 0 newrow.append('0') newrow.extend(supplist) newM.append(newrow) newlen = len(newM[0]) allcolumn = range(newlen) if maptypetoparam[7] == 1: chosencolumn = range(newlen) else: chosencolumn = helper.choose_random_pos_list( newlen, int(math.ceil(maptypetoparam[7] * newlen))) helper.change_char_in_mat(newM, chosencolumn, {'0': 'C', '1': 'G'}) helper.change_char_in_mat(newM, allcolumn, {'0': 'A', '1': 'T'}) return helper.convert_mat_to_strlist(newM, n)
def gendnaword12378(n, maptypetoparam): """ Generate and return a set of DNA words satisfying C1, C2, C3, C7 and C8 constraints. Inputs: + n: the number of strings to generate + maptypetoparam: a dictionary that maps an integer (representing the constraint type) to the parameter corresponding to that constraint type. It must have 1, 2, 3, 7 and 8 as keys. Output: + strlist: a Python list of DNA words satisfying C1, C2, C3, C7 and C8 constraints Example: to generate a set of 50 DNA words satisfying C1(8), C2(4), C3(5), C7(0.7), C8(3), call the function gendnaword12378(50, {1 : 8, 2 : 4, 3 : 5, 7 : 0.7, 8 : 3}) """ if n <= 0: return [] if maptypetoparam[7] > 1: return [] M = genbinword14(n, maptypetoparam[1], 1) l0 = len(M[0]) if l0 & 1: # If l0 is odd, append '0' at the end of every word in M so that # the new length is even for row in xrange(n): M[row].append('0') strlist = [] k = max(maptypetoparam[2], maptypetoparam[3]) for row in xrange(n): newlist = ['1'] * k newlist.extend(M[row]) newlist.extend(['1'] * k) strlist.append(''.join(newlist)) # Break run for strid in xrange(n): strlist[strid] = algo_subroutine.breakrun(strlist[strid], maptypetoparam[8]) newlen = len(strlist[0]) chosencolumn = [] allcolumn = range(newlen) if int(maptypetoparam[7]) == 1: chosencolumn = range(newlen) else: chosencolumn = helper.choose_random_pos_list( newlen, int(math.ceil(maptypetoparam[7] * newlen))) for strid in xrange(n): curlist = list(strlist[strid]) helper.change_char_in_mat([curlist], chosencolumn, { '0': 'C', '1': 'G' }) helper.change_char_in_mat([curlist], allcolumn, {'0': 'A', '1': 'T'}) strlist[strid] = ''.join(curlist) return strlist
def gendnaword1to8algo1(n, maptypetoparam): """ Generate and return a set of DNA words satisfying C1 through C8 constraints. Inputs: + n: the number of strings to generate + maptypetoparam: a dictionary that maps an integer (representing the constraint type) to the parameter corresponding to that constraint type. It must have 1, 2, ..., 8 as keys. Output: + strlist: a Python list of DNA words satisfying C1 through C8 constraints Exception: + A RuntimeError will be raised if the following condition is NOT satified: 1 / (d + 1) <= gamma <= d / (d + 1) where gamma = maptypetoparam[7] and d = maptypetoparam[8] """ gamma = maptypetoparam[7] if n <= 0 or gamma > 1: return [] d = maptypetoparam[8] if (1.0 / (d + 1) > gamma) or (gamma > d * 1.0 / (d + 1)): raise RuntimeError("gendnaword1to8algo1 works only if 1 / (d + 1) <= gamma <= d / (d + 1)") # Generate the set of strings satisfies C1 constraint M = genbinword14(n, max(maptypetoparam[1], maptypetoparam[4]), 1) k = max([maptypetoparam[i] for i in xrange(2, 6 + 1)]) newM = [] for row in xrange(n): newrow = [] newrow.extend(['1'] * k) newrow.extend(M[row]) newrow.extend(['1'] * k) newM.append(newrow) # Find oddlist and evenlist as mentioned in Step 3 (see comments above) newlen = len(newM[0]) newgamma = gamma if newgamma < 0.5: newgamma = 1 - newgamma numchoose = int(newgamma * newlen) numnotchoose = newlen - numchoose minoddsize = int(numchoose * 1.0 / numnotchoose) numleft = numchoose % numnotchoose oddlist = [] evenlist = [] ind = 0 oddsize = 0 while ind < newlen: oddlist.append(ind) ind += 1 oddsize += 1 if ind < newlen and oddsize == minoddsize: if numleft != 0: oddlist.append(ind) numleft -= 1 ind += 1 if ind < newlen: evenlist.append(ind) ind += 1 oddsize = 0 # Convert binary words into DNA words if gamma < 0.5: helper.change_char_in_mat(newM, oddlist, {'0': 'A', '1': 'T'}) helper.change_char_in_mat(newM, evenlist, {'0': 'C', '1': 'G'}) else: helper.change_char_in_mat(newM, oddlist, {'0': 'C', '1': 'G'}) helper.change_char_in_mat(newM, evenlist, {'0': 'A', '1': 'T'}) return helper.convert_mat_to_strlist(newM, n)
def gendnaword1to8algo2(n, maptypetoparam): """ Generate and return a set of DNA words satisfying C1 through C8 constraints. Inputs: + n: the number of strings to generate + maptypetoparam: a dictionary that maps an integer (representing the constraint type) to the parameter corresponding to that constraint type. It must have 1, 2, ..., 8 as keys. Output: + strlist: a Python list of DNA words satisfying C1 through C8 constraints Exception: + A RuntimeError will be raised if maptypetoparam[8] < 2 """ if n <= 0: return [] if maptypetoparam[7] > 1: return [] if maptypetoparam[8] < 2: raise RuntimeError("gendnaword1to8algo2 only works with maxlenrun >= 2") # Generate the set of strings satisfies C1 constraint M = genbinword14(n, max(maptypetoparam[1], maptypetoparam[4]), 1) k = max([maptypetoparam[i] for i in xrange(2, 6 + 1)]) newM = [] l0 = len(M[0]) # Prepare the 'string' (list of characters) used later baselist = ['1'] * (maptypetoparam[8] - 1) baselist.append('0') numtime = int(math.ceil(k * 1.0 / maptypetoparam[8])) supplist = baselist * numtime for row in xrange(n): newrow = [] newrow.extend(supplist) newrow.append('1') sublen = 0 for ind in xrange(l0): newrow.append(M[row][ind]) sublen += 1 if (sublen == maptypetoparam[8] - 1) or (ind == l0 - 1): newrow.append(helper.get_complement_letter(M[row][ind])) sublen = 0 newrow.append('0') newrow.extend(supplist) newM.append(newrow) newlen = len(newM[0]) allcolumn = range(newlen) if maptypetoparam[7] == 1: chosencolumn = range(newlen) else: chosencolumn = helper.choose_random_pos_list(newlen, int(math.ceil(maptypetoparam[7] * newlen))) helper.change_char_in_mat(newM, chosencolumn, {'0': 'C', '1': 'G'}) helper.change_char_in_mat(newM, allcolumn, {'0': 'A', '1': 'T'}) return helper.convert_mat_to_strlist(newM, n)