Пример #1
0
def globalAlignment(seq1, seq2, matchScore, misMatchScore, gapScore):
	matchScore = int(matchScore)
	misMatchScore = int(misMatchScore)
	gapScore = int(gapScore)
	
	#File input
	file1 = open(fileA, 'r')
	file2 = open(fileB, 'r')
	readLines1 = file1.readlines()
	readLines2 = file2.readlines()
	linesA = utils.getArraysFromFile(readLines1)
	linesB = utils.getArraysFromFile(readLines2)
	
	'''Returns the optimal alignment of all sequences located in a fafsta file.
		ie if there are four sequences on one file and 5 on the other,
		this will find the alignments of all 20 combinations. This an array containing
		each of the sequences, the score, and the percent correct.'''
	ret = utils.initialize2DArray(len(linesA)*len(linesB), 5, 0)
	retIndex = 0
	idInfo1 = utils.getSequenceInfo(readLines1)
	idInfo2 = utils.getSequenceInfo(readLines2)
	for i in range(0, len(linesA)):
		for j in range(0, len(linesB)):
			info = getAlignment(linesA[i], linesB[j], matchScore, misMatchScore, gapScore)
			#alignments
			ret[retIndex][0] = info[0]
			ret[retIndex][1] = info[1]
			#score
			ret[retIndex][2] = info[2]
			#percent correct
			ret[retIndex][3] = idInfo1[i]
			ret[retIndex][4] = idInfo2[j]
			retIndex += 1
	file1.close()
	file2.close()
	return ret
Пример #2
0
def getAlignment(horzChars, vertChars, matchScore, misMatchScore, gapScore):
	scores = utils.initialize2DArray(len(horzChars)+1, len(vertChars)+1, 0)
	pointers = utils.initialize2DArray(len(horzChars)+1, len(vertChars)+1, [-float('inf'),-float('inf')])
	dirs = utils.initialize2DArray(len(horzChars)+1, len(vertChars)+1, -float('inf'))
	#print scores
	#sets the initial gap scores
	for i in range(0, len(horzChars)+1):
		scores[i][0] = i*gapScore
	for i in range(1, len(vertChars)+1):
		scores[0][i] = i*gapScore

	for i in range(1, len(horzChars)+1):
		for j in range(1, len(vertChars)+1):
			if horzChars[i-1] == vertChars[j-1]:
				scores[i][j] = utils.maxVal(scores[i-1][j-1]+matchScore, scores[i-1][j]+gapScore, scores[i][j-1]+gapScore)
				direc = utils.maxForPointer(scores[i-1][j-1]+matchScore, scores[i-1][j]+gapScore, scores[i][j-1]+gapScore)
			else:
				scores[i][j] = utils.maxVal(scores[i-1][j-1]+misMatchScore, scores[i-1][j]+gapScore, scores[i][j-1]+gapScore)
				direc = utils.maxForPointer(scores[i-1][j-1]+misMatchScore, scores[i-1][j]+gapScore, scores[i][j-1]+gapScore)
			
			if direc == 0:
				pointers[i][j] = [i-1,j-1]
				dirs[i][j] = direc
			elif direc == 1:
				pointers[i][j] = [i-1,j]
				dirs[i][j] = direc
			else:
				pointers[i][j] = [i, j-1]
				dirs[i][j] = direc

	maxScore = scores[len(horzChars)][len(vertChars)]
	pt = [len(horzChars),len(vertChars)]

	#traceback
	horzOut = []
	vertOut = []

	oldPt = pt
	while(pt[0]!= -float('inf') or pt[1]!= -float('inf')):
		if dirs[pt[0]][pt[1]] == 0:
			horzOut.insert(0, horzChars[pt[0]-1])
			vertOut.insert(0, vertChars[pt[1]-1])
		elif dirs[pt[0]][pt[1]] == 1:
			vertOut.insert(0, '-');
			horzOut.insert(0, horzChars[pt[0]-1])
		elif dirs[pt[0]][pt[1]] == 2:
			horzOut.insert(0, '-');
			vertOut.insert(0, vertChars[pt[1]-1])
		oldPt = pt
		pt = pointers[pt[0]][pt[1]]

	#printing indels on edge
	pt = oldPt
	if pt[0]!=0 or pt[1]!=0:
		#in vertical column
		if pt[1]!=0:
			numIndels = scores[0][pt[1]]/(gapScore)
			for i in range(0, numIndels):
				horzOut.insert(0, '-')
			while(pt[1]!=0):
				vertOut.insert(0, vertChars[pt[1]-1])
				pt[1] = pt[1]-1;
		elif pt[0] != 0:
			numIndels = scores[pt[0]][0]/(gapScore)
			for i in range(0, numIndels):
				vertOut.insert(0, '-');
			while(pt[0]!=0):
				horzOut.insert(0, horzChars[pt[0]-1])
				pt[0] = pt[0]-1;
			
	print "Score: " + str(maxScore)	
	#utils.printArray(horzOut)
	#utils.printArray(vertOut)
	#print '\n'
	return maxScore