def createShingles(inputDocument): documentLength = len(inputDocument) shingles = [] hashedShingles = [] beginIndex = 0; endSliceIndex = beginIndex + shingleLength; while endSliceIndex <= documentLength: # Slice ends at n-1 in python shingle = inputDocument[beginIndex:endSliceIndex] shingles.append(shingle) hashedShingles.append(hash(shingle)) beginIndex = beginIndex + 1 endSliceIndex = beginIndex + shingleLength; Common.printVector(shingles) Common.printVector(hashedShingles)
import Common p = [5, 4, 6, 2, 7] length = len(p) - 1 m = [[0 for _ in range(length)] for _ in range(length)] s = [[0 for _ in range(length)] for _ in range(length)] def computeMatrix(): for currentLength in range(length): print("Current Length={0}".format(currentLength)) for i in range(0, length - currentLength, 1): j = i + currentLength print("i={0},j={1}".format(i, j)) for k in range(i, j, 1): print(" k={0}".format(k)) num = m[i][k] + m[k+1][j] + p[i]*p[k + 1]*p[j + 1] if (m[i][j] == 0 or num < m[i][j]): m[i][j] = num s[i][j] = k Common.printVector(p, label="Vector p") computeMatrix() Common.printMatrix(m, label="Optimal Cost", r="Row Size", c="Column Size") Common.printMatrix(s, label="Partition", r="Row Size", c="Column Size")
for column in range(0, columns): currentRowNumber = referenceNumbers[row] currentColumnSum = column + 1 if currentColumnSum < currentRowNumber: if row == 0: m[row][column] = False else: # Copy previous row value m[row][column] = m[row - 1][column] elif currentColumnSum == currentRowNumber: m[row][column] = True else: m[row][column] = m[row - 1][column] or m[row - 1][column - currentRowNumber] def backTrack(): row = rows - 1 column = columns - 1 while column >= 0 and row >= 0: currentRowValue = m[row][column] previousRowValue = m[row - 1][column] if row - 1 >= 0 else False if currentRowValue == previousRowValue: row = row - 1 else: backtrack.insert(0, referenceNumbers[row]) column = column - referenceNumbers[row] Common.printVector(referenceNumbers, label="Vector referenceNumbers") computeMatrix() Common.printMatrix(m, label="Matrix", r="Row Size", c="Column Size") backTrack() Common.printVector(backtrack, label="Backtrack numbers")
# Max of previous column or row m[row][column] = 0 def backtrack(): # Find the max element maxValue = 0 maxRowIndex = 0 maxColumnIndex = 0 for row in range(1, rows): rowMax = max(m[row]) if rowMax > maxValue: maxValue = rowMax maxRowIndex = row maxColumnIndex = m[row].index(maxValue) while maxRowIndex > 0 and maxColumnIndex > 0: diagonal = m[maxRowIndex - 1][maxColumnIndex - 1] if diagonal == maxValue - 1: seq.insert(0, word1[maxColumnIndex - 1]) maxValue = diagonal maxRowIndex = maxRowIndex - 1 maxColumnIndex = maxColumnIndex - 1 else: break; Common.printVector(word1, label="word1") Common.printVector(word2, label="word2") longestCommon() Common.printMatrix(m, r="word2", c="word1") backtrack() Common.printVector(seq, label="Common Substring")
if char1 == char2: # Diagonal + 1 diagonal = m[row - 1][column - 1] m[row][column] = diagonal + 1 else: # Max of previous column or row m[row][column] = max(m[row][column - 1], m[row - 1][column]) def backtrack(): column = columns - 1 row = rows - 1 while column > 0 and row > 0: current = m[row][column] prevRow = m[row - 1][column] prevColumn = m[row][column - 1] prevDiagonal = m[row - 1][column - 1] if current == prevColumn: column = column - 1 elif current == prevRow: row = row - 1 elif current == prevDiagonal + 1: seq.append(word1[column - 1]) column = column - 1 row = row - 1 Common.printVector(word1, label="word1") Common.printVector(word2, label="word2") commonSubsequence() Common.printMatrix(m, r="word2", c="word1") backtrack() Common.printVector(seq, label="Common Subsequence")