def normalizeOutput(self, o: Vector) -> Vector: """ The normalizeOutput method takes an input {@link Vector} o, gets the result for e^o of each element of o, then sums them up. At the end, divides the each e^o by the summation. PARAMETERS ---------- o : Vector Vector to normalize. RETURNS ------- Vector Normalized vector. """ total = 0.0 values = [] for i in range(o.size()): if o.getValue(i) > 500: total += math.exp(500) else: total += math.exp(o.getValue(i)) for i in range(o.size()): if o.getValue(i) > 500: values.append(math.exp(500) / total) else: values.append(math.exp(o.getValue(i)) / total) return Vector(values)
def viterbi(self, s: list) -> list: """ viterbi calculates the most probable state sequence for a set of observed symbols. PARAMETERS ---------- s : list A set of observed symbols. RETURNS ------- list The most probable state sequence as an {@link ArrayList}. """ result = [] sequenceLength = len(s) gamma = Matrix(sequenceLength, self.stateCount * self.stateCount) phi = Matrix(sequenceLength, self.stateCount * self.stateCount) qs = Vector(sequenceLength, 0) emission1 = s[0] emission2 = s[1] for i in range(self.stateCount): for j in range(self.stateCount): observationLikelihood = self.states[i].getEmitProb( emission1) * self.states[j].getEmitProb(emission2) gamma.setValue( 1, i * self.stateCount + j, self.safeLog(self.__pi.getValue(i, j)) + self.safeLog(observationLikelihood)) for t in range(2, sequenceLength): emission = s[t] for j in range(self.stateCount * self.stateCount): current = self.__logOfColumn(j) previous = gamma.getRowVector(t - 1).skipVector( self.stateCount, j // self.stateCount) current.addVector(previous) maxIndex = current.maxIndex() observationLikelihood = self.states[ j % self.stateCount].getEmitProb(emission) gamma.setValue( t, j, current.getValue(maxIndex) + self.safeLog(observationLikelihood)) phi.setValue(t, j, maxIndex * self.stateCount + j // self.stateCount) qs.setValue(sequenceLength - 1, gamma.getRowVector(sequenceLength - 1).maxIndex()) result.insert( 0, self.states[int(qs.getValue(sequenceLength - 1)) % self.stateCount].getState()) for i in range(sequenceLength - 2, 0, -1): qs.setValue(i, phi.getValue(i + 1, int(qs.getValue(i + 1)))) result.insert( 0, self.states[int(qs.getValue(i)) % self.stateCount].getState()) result.insert( 0, self.states[int(qs.getValue(1)) // self.stateCount].getState()) return result
def multiplyWithVectorFromRight(self, v: Vector) -> Vector: """ The multiplyWithVectorFromRight method takes a Vector as an input and creates a result list. Then, multiplies values of input Vector starting from the right side with the values list, accumulates the multiplication, and assigns to the result list. If the sizes of both Vector and row number do not match, it throws MatrixColumnMismatch exception. PARAMETERS ---------- v : Vector Vector type input. RETURNS ------- Vector Vector that holds the result. """ if self.__col != v.size(): raise MatrixColumnMismatch result = Vector() for i in range(self.__row): total = 0.0 for j in range(self.__col): total += v.getValue(j) * self.__values[i][j] result.add(total) return result
def addVectorAttribute(self, vector: Vector): """ Adds a Vector of continuous attributes. PARAMETERS ---------- vector : Vector Vector that has the continuous attributes. """ for i in range(vector.size()): self.__attributes.append(ContinuousAttribute(vector.getValue(i)))
def viterbi(self, s: list) -> list: """ viterbi calculates the most probable state sequence for a set of observed symbols. PARAMETERS ---------- s : list A set of observed symbols. RETURNS ------- list The most probable state sequence as an {@link ArrayList}. """ result = [] sequenceLength = len(s) gamma = Matrix(sequenceLength, self.stateCount) phi = Matrix(sequenceLength, self.stateCount) qs = Vector(sequenceLength, 0) emission = s[0] for i in range(self.stateCount): observationLikelihood = self.states[i].getEmitProb(emission) gamma.setValue(0, i, self.safeLog(self.__pi.getValue(i)) + self.safeLog(observationLikelihood)) for t in range(1, sequenceLength): emission = s[t] for j in range(self.stateCount): tempArray = self.__logOfColumn(j) tempArray.addVector(gamma.getRowVector(t - 1)) maxIndex = tempArray.maxIndex() observationLikelihood = self.states[j].getEmitProb(emission) gamma.setValue(t, j, tempArray.getValue(maxIndex) + self.safeLog(observationLikelihood)) phi.setValue(t, j, maxIndex) qs.setValue(sequenceLength - 1, gamma.getRowVector(sequenceLength - 1).maxIndex()) result.insert(0, self.states[int(qs.getValue(sequenceLength - 1))].getState()) for i in range(sequenceLength - 2, -1, -1): qs.setValue(i, phi.getValue(i + 1, int(qs.getValue(i + 1)))) result.insert(0, self.states[int(qs.getValue(i))].getState()) return result
def addRowVector(self, rowNo: int, v: Vector): """ The add method which takes a row number and a Vector as inputs. It sums up the corresponding values at the given row of values list and given Vector. If the sizes of both Matrix and values list do not match, it throws MatrixColumnMismatch exception. PARAMETERS ---------- rowNo : int integer input for row number. v : Vector Vector type input. """ if self.__col != v.size(): raise MatrixColumnMismatch for i in range(self.__col): self.__values[rowNo][i] += v.getValue(i)
class Hmm1(Hmm): __pi: Vector def __init__(self, states: set, observations: list, emittedSymbols: list): """ A constructor of Hmm1 class which takes a Set of states, an array of observations (which also consists of an array of states) and an array of instances (which also consists of an array of emitted symbols). The constructor calls its super method to calculate the emission probabilities for those states. PARAMETERS ---------- states : set A Set of states, consisting of all possible states for this problem. observations : list An array of instances, where each instance consists of an array of states. emittedSymbols : list An array of instances, where each instance consists of an array of symbols. """ super().__init__(states, observations, emittedSymbols) def calculatePi(self, observations: list): """ calculatePi calculates the prior probability vector (initial probabilities for each state) from a set of observations. For each observation, the function extracts the first state in that observation. Normalizing the counts of the states returns us the prior probabilities for each state. PARAMETERS ---------- observations : list A set of observations used to calculate the prior probabilities. """ self.__pi = Vector() self.__pi.initAllSame(self.stateCount, 0.0) for observation in observations: index = self.stateIndexes[observation[0]] self.__pi.addValue(index, 1.0) self.__pi.l1Normalize() def calculateTransitionProbabilities(self, observations: list): """ calculateTransitionProbabilities calculates the transition probabilities matrix from each state to another state. For each observation and for each transition in each observation, the function gets the states. Normalizing the counts of the pair of states returns us the transition probabilities. PARAMETERS ---------- observations : list A set of observations used to calculate the transition probabilities. """ self.transitionProbabilities = Matrix(self.stateCount, self.stateCount) for current in observations: for j in range(len(current) - 1): fromIndex = self.stateIndexes[current[j]] toIndex = self.stateIndexes[current[j + 1]] self.transitionProbabilities.increment(fromIndex, toIndex) self.transitionProbabilities.columnWiseNormalize() def __logOfColumn(self, column: int) -> Vector: """ logOfColumn calculates the logarithm of each value in a specific column in the transition probability matrix. PARAMETERS ---------- column : int Column index of the transition probability matrix. RETURNS ------- Vector A vector consisting of the logarithm of each value in the column in the transition probability matrix. """ result = Vector() for i in range(self.stateCount): result.add(self.safeLog(self.transitionProbabilities.getValue(i, column))) return result def viterbi(self, s: list) -> list: """ viterbi calculates the most probable state sequence for a set of observed symbols. PARAMETERS ---------- s : list A set of observed symbols. RETURNS ------- list The most probable state sequence as an {@link ArrayList}. """ result = [] sequenceLength = len(s) gamma = Matrix(sequenceLength, self.stateCount) phi = Matrix(sequenceLength, self.stateCount) qs = Vector(sequenceLength, 0) emission = s[0] for i in range(self.stateCount): observationLikelihood = self.states[i].getEmitProb(emission) gamma.setValue(0, i, self.safeLog(self.__pi.getValue(i)) + self.safeLog(observationLikelihood)) for t in range(1, sequenceLength): emission = s[t] for j in range(self.stateCount): tempArray = self.__logOfColumn(j) tempArray.addVector(gamma.getRowVector(t - 1)) maxIndex = tempArray.maxIndex() observationLikelihood = self.states[j].getEmitProb(emission) gamma.setValue(t, j, tempArray.getValue(maxIndex) + self.safeLog(observationLikelihood)) phi.setValue(t, j, maxIndex) qs.setValue(sequenceLength - 1, gamma.getRowVector(sequenceLength - 1).maxIndex()) result.insert(0, self.states[int(qs.getValue(sequenceLength - 1))].getState()) for i in range(sequenceLength - 2, -1, -1): qs.setValue(i, phi.getValue(i + 1, int(qs.getValue(i + 1)))) result.insert(0, self.states[int(qs.getValue(i))].getState()) return result
class VectorTest(unittest.TestCase): data1 = [2, 3, 4, 5, 6] def setUp(self): data2 = [8, 7, 6, 5, 4] self.smallVector1 = Vector(self.data1) self.smallVector2 = Vector(data2) largeData1 = [] for i in range(1, 1001): largeData1.append(i) self.largeVector1 = Vector(largeData1) largeData2 = [] for i in range(1, 1001): largeData2.append(1000 - i + 1) self.largeVector2 = Vector(largeData2) def test_Biased(self): biased = self.smallVector1.biased() self.assertEqual(1, biased.getValue(0)) self.assertEqual(self.smallVector1.size() + 1, biased.size()) def test_ElementAdd(self): self.smallVector1.add(7) self.assertEqual(7, self.smallVector1.getValue(5)) self.assertEqual(6, self.smallVector1.size()) self.smallVector1.remove(5) def test_Insert(self): self.smallVector1.insert(3, 6) self.assertEqual(6, self.smallVector1.getValue(3)) self.assertEqual(6, self.smallVector1.size()) self.smallVector1.remove(3) def test_Remove(self): self.smallVector1.remove(2) self.assertEqual(5, self.smallVector1.getValue(2)) self.assertEqual(4, self.smallVector1.size()) self.smallVector1.insert(2, 4) def test_SumOfElementsSmall(self): self.assertEqual(20, self.smallVector1.sumOfElements()) self.assertEqual(30, self.smallVector2.sumOfElements()) def test_SumOfElementsLarge(self): self.assertEqual(20, self.smallVector1.sumOfElements()) self.assertEqual(30, self.smallVector2.sumOfElements()) self.assertEqual(500500, self.largeVector1.sumOfElements()) self.assertEqual(500500, self.largeVector2.sumOfElements()) def test_MaxIndex(self): self.assertEqual(4, self.smallVector1.maxIndex()) self.assertEqual(0, self.smallVector2.maxIndex()) def test_Sigmoid(self): smallVector3 = Vector(self.data1) smallVector3.sigmoid() self.assertAlmostEqual(0.8807971, smallVector3.getValue(0), 6) self.assertAlmostEqual(0.9975274, smallVector3.getValue(4), 6) def test_SkipVectorSmall(self): smallVector3 = self.smallVector1.skipVector(2, 0) self.assertEqual(2, smallVector3.getValue(0)) self.assertEqual(6, smallVector3.getValue(2)) smallVector3 = self.smallVector1.skipVector(3, 1) self.assertEqual(3, smallVector3.getValue(0)) self.assertEqual(6, smallVector3.getValue(1)) def test_SkipVectorLarge(self): largeVector3 = self.largeVector1.skipVector(2, 0) self.assertEqual(250000, largeVector3.sumOfElements()) largeVector3 = self.largeVector1.skipVector(5, 3) self.assertEqual(100300, largeVector3.sumOfElements()) def test_VectorAddSmall(self): self.smallVector1.addVector(self.smallVector2) self.assertEqual(50, self.smallVector1.sumOfElements()) self.smallVector1.subtract(self.smallVector2) def test_VectorAddLarge(self): self.largeVector1.addVector(self.largeVector2) self.assertEqual(1001000, self.largeVector1.sumOfElements()) self.largeVector1.subtract(self.largeVector2) def test_SubtractSmall(self): self.smallVector1.subtract(self.smallVector2) self.assertEqual(-10, self.smallVector1.sumOfElements()) self.smallVector1.addVector(self.smallVector2) def test_SubtractLarge(self): self.largeVector1.subtract(self.largeVector2) self.assertEqual(0, self.largeVector1.sumOfElements()) self.largeVector1.addVector(self.largeVector2) def test_DifferenceSmall(self): smallVector3 = self.smallVector1.difference(self.smallVector2) self.assertEqual(-10, smallVector3.sumOfElements()) def test_DifferenceLarge(self): largeVector3 = self.largeVector1.difference(self.largeVector2) self.assertEqual(0, largeVector3.sumOfElements()) def test_DotProductWithVectorSmall(self): dotProduct = self.smallVector1.dotProduct(self.smallVector2) self.assertEqual(110, dotProduct) def test_DotProductWithVectorLarge(self): dotProduct = self.largeVector1.dotProduct(self.largeVector2) self.assertEqual(167167000, dotProduct) def test_DotProductWithItselfSmall(self): dotProduct = self.smallVector1.dotProductWithSelf() self.assertEqual(90, dotProduct) def test_DotProductWithItselfLarge(self): dotProduct = self.largeVector1.dotProductWithSelf() self.assertEqual(333833500, dotProduct) def test_ElementProductSmall(self): smallVector3 = self.smallVector1.elementProduct(self.smallVector2) self.assertEqual(110, smallVector3.sumOfElements()) def test_ElementProductLarge(self): largeVector3 = self.largeVector1.elementProduct(self.largeVector2) self.assertEqual(167167000, largeVector3.sumOfElements()) def test_Divide(self): self.smallVector1.divide(10.0) self.assertEqual(2, self.smallVector1.sumOfElements()) self.smallVector1.multiply(10.0) def test_Multiply(self): self.smallVector1.multiply(10.0) self.assertEqual(200, self.smallVector1.sumOfElements()) self.smallVector1.divide(10.0) def test_Product(self): smallVector3 = self.smallVector1.product(7.0) self.assertEqual(140, smallVector3.sumOfElements()) def test_L1NormalizeSmall(self): self.smallVector1.l1Normalize() self.assertEqual(1.0, self.smallVector1.sumOfElements()) self.smallVector1.multiply(20) def test_L1NormalizeLarge(self): self.largeVector1.l1Normalize() self.assertEqual(1.0, self.largeVector1.sumOfElements()) self.largeVector1.multiply(500500) def test_L2NormSmall(self): norm = self.smallVector1.l2Norm() self.assertEqual(norm, math.sqrt(90)) def test_L2NormLarge(self): norm = self.largeVector1.l2Norm() self.assertEqual(norm, math.sqrt(333833500)) def test_cosineSimilaritySmall(self): similarity = self.smallVector1.cosineSimilarity(self.smallVector2) self.assertAlmostEqual(0.8411910, similarity, 6) def test_cosineSimilarityLarge(self): similarity = self.largeVector1.cosineSimilarity(self.largeVector2) self.assertAlmostEqual(0.5007497, similarity, 6)
def test_Sigmoid(self): smallVector3 = Vector(self.data1) smallVector3.sigmoid() self.assertAlmostEqual(0.8807971, smallVector3.getValue(0), 6) self.assertAlmostEqual(0.9975274, smallVector3.getValue(4), 6)