def global_align(seq1, seq2, Parameters=Parameters()): M = create_matrix(len(seq1), len(seq2)) Ix = create_matrix(len(seq1), len(seq2)) Iy = create_matrix(len(seq1), len(seq2)) #initializes matrices for i in range(0, len(seq1) + 1): for j in range(0, len(seq2) + 1): if i == 0 and j == 0: M[i][j] = 0 elif i == 0: Iy[i][j] = 0 Ix[i][j] = -INF M[i][j] = -INF elif j == 0: Ix[i][j] = 0 Iy[i][j] = -INF M[i][j] = -INF #Affine Score for i in range(1, len(seq1) + 1): for j in range(1, len(seq2) + 1): score = Parameters.score(seq1[i - 1], seq2[j - 1]) diag = M[i - 1][j - 1] + score esq = Ix[i - 1][j - 1] + score cim = Iy[i - 1][j - 1] + score M[i][j] = max(esq, diag, cim) Ix[i][j] = max(M[i - 1][j] + Parameters.gapopen + Parameters.gap, Ix[i - 1][j] + Parameters.gap) Iy[i][j] = max(M[i][j - 1] + Parameters.gapopen + Parameters.gap, Iy[i][j - 1] + Parameters.gap) if (DEBUG): print("\nIx") print_matrix("_" + seq1, "_" + seq2, Ix) print("\nIy") print_matrix("_" + seq1, "_" + seq2, Iy) print("\nM") print_matrix("_" + seq1, "_" + seq2, M) return M, Ix, Iy
def local_align(seq1, seq2, Parameters=Parameters()): M = create_matrix(len(seq1), len(seq2)) score = 0 li = 0 lj = 0 # fill in A in the right order for i in range(1, len(seq1) + 1): for j in range(1, len(seq2) + 1): # the local alignment recurrance rule: M[i][j] = max( M[i][j - 1] + Parameters.gap, M[i - 1][j] + Parameters.gap, M[i - 1][j - 1] + Parameters.score(seq1[i - 1], seq2[j - 1]), 0) # track the cell with the largest score if M[i][j] >= score: score = M[i][j] li = i lj = j return M, li, lj
def traceback_left(M, Ix, Iy, seq1, seq2, Parameters): # As duas sequencias alignedseq1 = "" alignedseq2 = "" # Os ultimos indices i = len(seq1) j = len(seq2) #Matriz if (M[i][j] >= Ix[i][j] and M[i][j] >= Iy[i][j]): matrix = "M" elif (Ix[i][j] > Iy[i][j]): matrix = "Ix" else: matrix = "Iy" while ((i is not 0) or (j is not 0)): if matrix == "M": alignedseq2 = alignedseq2 + seq2[j - 1] alignedseq1 = alignedseq1 + seq1[i - 1] score = Parameters.score(seq1[i - 1], seq2[j - 1]) diag = M[i - 1][j - 1] + score esq = Ix[i - 1][j - 1] + score cim = Iy[i - 1][j - 1] + score if M[i][j] == esq: matrix = "Ix" elif M[i][j] == diag: matrix = "M" elif M[i][j] == cim: matrix = "Iy" i = i - 1 j = j - 1 elif matrix == "Ix": alignedseq2 = alignedseq2 + "-" alignedseq1 = alignedseq1 + seq1[i - 1] if Ix[i][j] == M[i - 1][j] + Parameters.gapopen + Parameters.gap: matrix = "M" elif Ix[i][j] == Ix[i - 1][j] + Parameters.gap: matrix = "Ix" i = i - 1 elif matrix == "Iy": alignedseq1 = alignedseq1 + '-' alignedseq2 = alignedseq2 + seq2[j - 1] if Iy[i][j] == M[i][j - 1] + Parameters.gapopen + Parameters.gap: matrix = "M" elif Iy[i][j] == Iy[i][j - 1] + Parameters.gap: matrix = "Iy" j = j - 1 #Revertendo a String alignedseq1 = alignedseq1[::-1] alignedseq2 = alignedseq2[::-1] return alignedseq1, alignedseq2
elif Iy[i][j] == M[i][j - 1] + Parameters.gapopen + Parameters.gap: matrix = "M" j = j - 1 #Revertendo a String alignedseq1 = alignedseq1[::-1] alignedseq2 = alignedseq2[::-1] return alignedseq1, alignedseq2 if __name__ == '__main__': #Definicoes dos parametros par = Parameters(gapopen=-10, gap=-0.5, matrix='BLOSUM62', stype='protein') #Sequencias seq1 = io.read_fasta(io.read_file("../inputs/default1.fasta")) seq2 = io.read_fasta(io.read_file("../inputs/default2.fasta")) #Matriz de apontadores M, Ix, Iy = global_align(seq1, seq2, par) #Sequencias alinhadas leftalignedseq1, leftalignedseq2 = traceback_left(M, Ix, Iy, seq1, seq2, par) upalignedseq1, upalignedseq2 = traceback_up(M, Ix, Iy, seq1, seq2, par) result = Alignment(leftalignedseq1, leftalignedseq2, "LEFT") result.calculate_mat_mis_gaps()
def traceback_left(M, Ix, Iy, seq1, seq2, Parameters, maxmatrix, li, lj): # As duas sequencias alignedseq1 = "" alignedseq2 = "" # Os indices do max score i = li j = lj #Matriz matrix = maxmatrix while True: print(matrix, i, j) if matrix == "M": alignedseq2 = alignedseq2 + seq2[j - 1] alignedseq1 = alignedseq1 + seq1[i - 1] if (M[i][j] == 0): break score = Parameters.score(seq1[i - 1], seq2[j - 1]) diag = M[i - 1][j - 1] + score esq = Ix[i - 1][j - 1] + score cim = Iy[i - 1][j - 1] + score if M[i][j] == esq: matrix = "Ix" elif M[i][j] == diag: matrix = "M" elif M[i][j] == cim: matrix = "Iy" i = i - 1 j = j - 1 elif matrix == "Ix": alignedseq2 = alignedseq2 + "-" alignedseq1 = alignedseq1 + seq1[i - 1] if (Ix[i][j] == 0): break if Ix[i][j] == Ix[i - 1][j] + Parameters.gap: matrix = "Ix" elif Ix[i][j] == M[i - 1][j] + Parameters.gapopen + Parameters.gap: matrix = "M" i = i - 1 elif matrix == "Iy": alignedseq1 = alignedseq1 + '-' alignedseq2 = alignedseq2 + seq2[j - 1] if (Iy[i][j] == 0): break if Iy[i][j] == M[i][j - 1] + Parameters.gapopen + Parameters.gap: matrix = "M" elif Iy[i][j] == Iy[i][j - 1] + Parameters.gap: matrix = "Iy" j = j - 1 #Revertendo a String alignedseq1 = alignedseq1[::-1] alignedseq2 = alignedseq2[::-1] return alignedseq1, alignedseq2
def local_align(seq1, seq2, Parameters=Parameters()): M = create_matrix(len(seq1), len(seq2)) Ix = create_matrix(len(seq1), len(seq2)) Iy = create_matrix(len(seq1), len(seq2)) #initializes matrices for i in range(0, len(seq1) + 1): for j in range(0, len(seq2) + 1): if i == 0 and j == 0: Iy[i][j] = Parameters.gapopen + (Parameters.gap * j) Ix[i][j] = Parameters.gapopen + (Parameters.gap * i) M[i][j] = 0 elif i == 0: Iy[i][j] = Parameters.gapopen + (Parameters.gap * j) Ix[i][j] = -INF M[i][j] = -INF elif j == 0: Ix[i][j] = Parameters.gapopen + (Parameters.gap * i) Iy[i][j] = -INF M[i][j] = -INF #Max score maxmatrix = "M" max_score = 0 li = 0 lj = 0 #Affine Score for i in range(1, len(seq1) + 1): for j in range(1, len(seq2) + 1): score = Parameters.score(seq1[i - 1], seq2[j - 1]) diag = M[i - 1][j - 1] + score esq = Ix[i - 1][j - 1] + score cim = Iy[i - 1][j - 1] + score M[i][j] = max(esq, diag, cim, 0) if M[i][j] >= max_score: max_score = M[i][j] li = i lj = j maxmatrix = "M" Ix[i][j] = max(M[i - 1][j] + Parameters.gapopen + Parameters.gap, Ix[i - 1][j] + Parameters.gap) if Ix[i][j] >= max_score: max_score = Ix[i][j] li = i lj = j maxmatrix = "Ix" Iy[i][j] = max(M[i][j - 1] + Parameters.gapopen + Parameters.gap, Iy[i][j - 1] + Parameters.gap) if Iy[i][j] >= max_score: max_score = Iy[i][j] li = i lj = j maxmatrix = "Iy" if (DEBUG): print("\nIx") print_matrix("_" + seq1, "_" + seq2, Ix) print("\nIy") print_matrix("_" + seq1, "_" + seq2, Iy) print("\nM") print_matrix("_" + seq1, "_" + seq2, M) return M, Ix, Iy, maxmatrix, li, lj
def traceback(M, seq1, seq2, i, j, Parameters): """ Esta funcao encontra o melhor aliamente seguindo o algoritmo a seguir: 0- verificar se a celula corrente esta na borda 1- obter os n vizinhos 2- obter o valor da celula corrente 3- Usar a formula a) M[i][j-1] + Parameters.gap == atual ? b) M[i-1][j-1] + Parameters.score(seq1[i-1], seq2[j-1]) == atual ? c) M[i-1][j] + Parameters.gap == atual ? 4 - Criar strings alignedseq1 e alignedseq2 5 - caso a) (esquerda) alignedseq2 + "_" alignedseq1 + seq1[j-1] #"_"+seq1 j = j-1 caso b) (diagonal) alignedseq1 + seq1[j-1] alignedseq2 + seq2[i-1] j = j-1 i = i-1 caso c) (cima) alignedseq1 + "_" alignedseq2 + seq2[i-1] i = i-1 6 - reverter a string OBS: J É LINHA e I É COLUNA """ # As duas sequencias alignedseq1 = "" alignedseq2 = "" while M[i][j] != 0: #Primeira linha ou esquerda if j == 0 or (M[i][j] == M[i - 1][j] + Parameters.gap): #if M[i-1][j] is not 0: alignedseq2 = alignedseq2 + "-" alignedseq1 = alignedseq1 + seq1[i - 1] i = i - 1 continue #Primeira coluna if i == 0: #if M[i][j-1] is not 0: alignedseq1 = alignedseq1 + "-" alignedseq2 = alignedseq2 + seq2[j - 1] j = j - 1 continue #Diagonal if M[i][j] == M[i - 1][j - 1] + Parameters.score( seq1[i - 1], seq2[j - 1]): #if M[i-1][j-1] is not 0: alignedseq2 = alignedseq2 + seq2[j - 1] alignedseq1 = alignedseq1 + seq1[i - 1] j = j - 1 i = i - 1 continue #Cima if M[i][j] == M[i][j - 1] + Parameters.gap: #if M[i][j-1] is not 0: alignedseq1 = alignedseq1 + '-' alignedseq2 = alignedseq2 + seq2[j - 1] j = j - 1 continue #Revertendo a String alignedseq1 = alignedseq1[::-1] alignedseq2 = alignedseq2[::-1] return alignedseq1, alignedseq2
#Cima if M[i][j] == M[i][j - 1] + Parameters.gap: #if M[i][j-1] is not 0: alignedseq1 = alignedseq1 + '-' alignedseq2 = alignedseq2 + seq2[j - 1] j = j - 1 continue #Revertendo a String alignedseq1 = alignedseq1[::-1] alignedseq2 = alignedseq2[::-1] return alignedseq1, alignedseq2 if __name__ == '__main__': #Definicoes dos parametros par = Parameters(gap=-10, matrix='BLOSUM62', stype='dna') seq1 = io.read_fasta(io.read_file("../inputs/default1.fasta")) seq2 = io.read_fasta(io.read_file("../inputs/default2.fasta")) matrix, i, j = local_align(seq1, seq2, par) alignedseq1, alignedseq2 = traceback(matrix, seq1, seq2, i, j, par) result = Alignment(alignedseq1, alignedseq2, "DEFAULT") result.calculate_mat_mis_gaps() io.write_file("../outputs/locally_local_linear_output.txt", str(result))