def train(self, trainSet: InstanceList, parameters: Parameter): """ Training algorithm for the linear discriminant analysis classifier (Introduction to Machine Learning, Alpaydin, 2015). PARAMETERS ---------- trainSet : InstanceList Training data given to the algorithm. parameters : Parameter Parameter of the Lda algorithm. """ w0 = {} w = {} priorDistribution = trainSet.classDistribution() classLists = Partition(trainSet) covariance = Matrix(trainSet.get(0).continuousAttributeSize(), trainSet.get(0).continuousAttributeSize()) for i in range(classLists.size()): averageVector = Vector(classLists.get(i).continuousAverage()) classCovariance = classLists.get(i).covariance(averageVector) classCovariance.multiplyWithConstant(classLists.get(i).size() - 1) covariance.add(classCovariance) covariance.divideByConstant(trainSet.size() - classLists.size()) covariance.inverse() for i in range(classLists.size()): Ci = classLists.get(i).getClassLabel() averageVector = Vector(classLists.get(i).continuousAverage()) wi = covariance.multiplyWithVectorFromRight(averageVector) w[Ci] = wi w0i = -0.5 * wi.dotProduct(averageVector) + math.log(priorDistribution.getProbability(Ci)) w0[Ci] = w0i self.model = LdaModel(priorDistribution, w, w0)
def calculateRMinusY(self, instance: Instance, inputVector: Vector, weights: Matrix) -> Vector: """ The calculateRMinusY method creates a new Vector with given Instance, then it multiplies given input Vector with given weights Matrix. After normalizing the output, it return the difference between the newly created Vector and normalized output. PARAMETERS ---------- instance : Instance Instance is used to get class labels. inputVector : Vector Vector to multiply weights. weights : Matrix Matrix of weights RETURNS ------- Vector Difference between newly created Vector and normalized output. """ r = Vector() r.initAllZerosExceptOne( self.K, self.classLabels.index(instance.getClassLabel()), 1.0) o = weights.multiplyWithVectorFromRight(inputVector) y = self.normalizeOutput(o) return r.difference(y)
def __init__(self, trainSet: InstanceList, validationSet: InstanceList, parameters: MultiLayerPerceptronParameter): """ The AutoEncoderModel method takes two InstanceLists as inputs; train set and validation set. First it allocates the weights of W and V matrices using given MultiLayerPerceptronParameter and takes the clones of these matrices as the bestW and bestV. Then, it gets the epoch and starts to iterate over them. First it shuffles the train set and tries to find the new W and V matrices. At the end it tests the autoencoder with given validation set and if its performance is better than the previous one, it reassigns the bestW and bestV matrices. Continue to iterate with a lower learning rate till the end of an episode. PARAMETERS ---------- trainSet : InstanceList InstanceList to use as train set. validationSet : InstanceList InstanceList to use as validation set. parameters : MultiLayerPerceptronParameter MultiLayerPerceptronParameter is used to get the parameters. """ super().__init__(trainSet) self.K = trainSet.get(0).continuousAttributeSize() self.__allocateWeights(parameters.getHiddenNodes(), parameters.getSeed()) bestW = copy.deepcopy(self.__W) bestV = copy.deepcopy(self.__V) bestPerformance = Performance(1000000000) epoch = parameters.getEpoch() learningRate = parameters.getLearningRate() for i in range(epoch): trainSet.shuffle(parameters.getSeed()) for j in range(trainSet.size()): self.createInputVector(trainSet.get(j)) self.r = trainSet.get(j).toVector() hidden = self.calculateHidden(self.x, self.__W) hiddenBiased = hidden.biased() self.y = self.__V.multiplyWithVectorFromRight(hiddenBiased) rMinusY = self.r.difference(self.y) deltaV = Matrix(rMinusY, hiddenBiased) oneMinusHidden = self.calculateOneMinusHidden(hidden) tmph = self.__V.multiplyWithVectorFromLeft(rMinusY) tmph.remove(0) tmpHidden = oneMinusHidden.elementProduct( hidden.elementProduct(tmph)) deltaW = Matrix(tmpHidden, self.x) deltaV.multiplyWithConstant(learningRate) self.__V.add(deltaV) deltaW.multiplyWithConstant(learningRate) self.__W.add(deltaW) currentPerformance = self.testAutoEncoder(validationSet) if currentPerformance.getErrorRate( ) < bestPerformance.getErrorRate(): bestPerformance = currentPerformance bestW = copy.deepcopy(self.__W) bestV = copy.deepcopy(self.__V) self.__W = bestW self.__V = bestV
def calculateTransitionProbabilities(self, observations: list): """ calculateTransitionProbabilities calculates the transition probabilities matrix from each state to another state. For each observation and for each transition in each observation, the function gets the states. Normalizing the counts of the pair of states returns us the transition probabilities. PARAMETERS ---------- observations : list A set of observations used to calculate the transition probabilities. """ self.transitionProbabilities = Matrix(self.stateCount, self.stateCount) for current in observations: for j in range(len(current) - 1): fromIndex = self.stateIndexes[current[j]] toIndex = self.stateIndexes[current[j + 1]] self.transitionProbabilities.increment(fromIndex, toIndex) self.transitionProbabilities.columnWiseNormalize()
def calculatePi(self, observations: list): """ calculatePi calculates the prior probability matrix (initial probabilities for each state combinations) from a set of observations. For each observation, the function extracts the first and second states in that observation. Normalizing the counts of the pair of states returns us the prior probabilities for each pair of states. PARAMETERS ---------- observations : list A set of observations used to calculate the prior probabilities. """ self.__pi = Matrix(self.stateCount, self.stateCount) for observation in observations: first = self.stateIndexes[observation[0]] second = self.stateIndexes[observation[1]] self.__pi.increment(first, second) self.__pi.columnWiseNormalize()
def setUp(self): self.small = Matrix(3, 3) for i in range(3): for j in range(3): self.small.setValue(i, j, 1.0) self.v = Vector(3, 1.0) self.large = Matrix(1000, 1000) for i in range(1000): for j in range(1000): self.large.setValue(i, j, 1.0) self.medium = Matrix(100, 100) for i in range(100): for j in range(100): self.medium.setValue(i, j, 1.0) self.V = Vector(1000, 1.0) self.vr = Vector(100, 1.0) self.random = Matrix(100, 100, 1, 10, 1) self.originalSum = self.random.sumOfElements() self.identity = Matrix(100)
def __init__(self, trainSet: InstanceList, validationSet: InstanceList, parameters: LinearPerceptronParameter): """ Constructor that takes InstanceLists as trainsSet and validationSet. Initially it allocates layer weights, then creates an input vector by using given trainSet and finds error. Via the validationSet it finds the classification performance and at the end it reassigns the allocated weight Matrix with the matrix that has the best accuracy. PARAMETERS ---------- trainSet : InstanceList InstanceList that is used to train. validationSet : InstanceList InstanceList that is used to validate. parameters : LinearPerceptronParameter Linear perceptron parameters; learningRate, etaDecrease, crossValidationRatio, epoch. """ super().__init__(trainSet) self.W = self.allocateLayerWeights(self.K, self.d + 1, parameters.getSeed()) bestW = copy.deepcopy(self.W) bestClassificationPerformance = ClassificationPerformance(0.0) epoch = parameters.getEpoch() learningRate = parameters.getLearningRate() for i in range(epoch): trainSet.shuffle(parameters.getSeed()) for j in range(trainSet.size()): self.createInputVector(trainSet.get(j)) rMinusY = self.calculateRMinusY(trainSet.get(j), self.x, self.W) deltaW = Matrix(rMinusY, self.x) deltaW.multiplyWithConstant(learningRate) self.W.add(deltaW) currentClassificationPerformance = self.testClassifier( validationSet) if currentClassificationPerformance.getAccuracy( ) > bestClassificationPerformance.getAccuracy(): bestClassificationPerformance = currentClassificationPerformance bestW = copy.deepcopy(self.W) learningRate *= parameters.getEtaDecrease() self.W = bestW
def __init__(self, corpus: Corpus, parameter: WordToVecParameter): """ Constructor for the NeuralNetwork class. Gets corpus and network parameters as input and sets the corresponding parameters first. After that, initializes the network with random weights between -0.5 and 0.5. Constructs vector update matrix and prepares the exp table. PARAMETERS ---------- corpus : Corpus Corpus used to train word vectors using Word2Vec algorithm. parameter : WordToVecParameter Parameters of the Word2Vec algorithm. """ self.__vocabulary = Vocabulary(corpus) self.__parameter = parameter self.__corpus = corpus self.__wordVectors = Matrix(self.__vocabulary.size(), self.__parameter.getLayerSize(), -0.5, 0.5) self.__wordVectorUpdate = Matrix(self.__vocabulary.size(), self.__parameter.getLayerSize()) self.__prepareExpTable()
def __init__(self, trainSet: InstanceList, validationSet: InstanceList, parameters: MultiLayerPerceptronParameter): """ A constructor that takes InstanceLists as trainsSet and validationSet. It sets the NeuralNetworkModel nodes with given InstanceList then creates an input vector by using given trainSet and finds error. Via the validationSet it finds the classification performance and reassigns the allocated weight Matrix with the matrix that has the best accuracy and the Matrix V with the best Vector input. PARAMETERS ---------- trainSet : InstanceList InstanceList that is used to train. validationSet : InstanceList InstanceList that is used to validate. parameters : MultiLayerPerceptronParameter Multi layer perceptron parameters; seed, learningRate, etaDecrease, crossValidationRatio, epoch, hiddenNodes. """ super().initWithTrainSet(trainSet) self.__allocateWeights(parameters.getHiddenNodes(), parameters.getSeed()) bestW = copy.deepcopy(self.W) bestV = copy.deepcopy(self.__V) bestClassificationPerformance = ClassificationPerformance(0.0) epoch = parameters.getEpoch() learningRate = parameters.getLearningRate() for i in range(epoch): trainSet.shuffle(parameters.getSeed()) for j in range(trainSet.size()): self.createInputVector(trainSet.get(j)) hidden = self.calculateHidden(self.x, self.W) hiddenBiased = hidden.biased() rMinusY = self.calculateRMinusY(trainSet.get(j), hiddenBiased, self.__V) deltaV = Matrix(rMinusY, hiddenBiased) oneMinusHidden = self.calculateOneMinusHidden(hidden) tmph = self.__V.multiplyWithVectorFromLeft(rMinusY) tmph.remove(0) tmpHidden = oneMinusHidden.elementProduct(hidden.elementProduct(tmph)) deltaW = Matrix(tmpHidden, self.x) deltaV.multiplyWithConstant(learningRate) self.__V.add(deltaV) deltaW.multiplyWithConstant(learningRate) self.W.add(deltaW) currentClassificationPerformance = self.testClassifier(validationSet) if currentClassificationPerformance.getAccuracy() > bestClassificationPerformance.getAccuracy(): bestClassificationPerformance = currentClassificationPerformance bestW = copy.deepcopy(self.W) bestV = copy.deepcopy(self.__V) learningRate *= parameters.getEtaDecrease() self.W = bestW self.__V = bestV
def calculateForwardSingleHiddenLayer(self, W: Matrix, V: Matrix): """ The calculateForwardSingleHiddenLayer method takes two matrices W and V. First it multiplies W with x, then multiplies V with the result of the previous multiplication. PARAMETERS ---------- W : Matrix Matrix to multiply with x. V : Matrix Matrix to multiply. """ hidden = self.calculateHidden(self.x, W) hiddenBiased = hidden.biased() self.y = V.multiplyWithVectorFromRight(hiddenBiased)
def calculateHidden(self, input: Vector, weights: Matrix) -> Vector: """ The calculateHidden method takes a {@link Vector} input and {@link Matrix} weights, It multiplies the weights Matrix with given input Vector than applies the sigmoid function and returns the result. PARAMETERS ---------- input : Vector Vector to multiply weights. weights : Matrix Matrix is multiplied with input Vector. RETURNS ------- Vector Result of sigmoid function. """ z = weights.multiplyWithVectorFromRight(input) z.sigmoid() return z
def allocateLayerWeights(self, row: int, column: int, seed: int) -> Matrix: """ The allocateLayerWeights method returns a new Matrix with random weights. PARAMETERS ---------- row : int Number of rows. column : int Number of columns. seed : int Seed for initialization of random function. RETURNS ------- Matrix Matrix with random weights. """ matrix = Matrix(row, column, -0.01, +0.01, seed) return matrix
def viterbi(self, s: list) -> list: """ viterbi calculates the most probable state sequence for a set of observed symbols. PARAMETERS ---------- s : list A set of observed symbols. RETURNS ------- list The most probable state sequence as an {@link ArrayList}. """ result = [] sequenceLength = len(s) gamma = Matrix(sequenceLength, self.stateCount) phi = Matrix(sequenceLength, self.stateCount) qs = Vector(sequenceLength, 0) emission = s[0] for i in range(self.stateCount): observationLikelihood = self.states[i].getEmitProb(emission) gamma.setValue(0, i, self.safeLog(self.__pi.getValue(i)) + self.safeLog(observationLikelihood)) for t in range(1, sequenceLength): emission = s[t] for j in range(self.stateCount): tempArray = self.__logOfColumn(j) tempArray.addVector(gamma.getRowVector(t - 1)) maxIndex = tempArray.maxIndex() observationLikelihood = self.states[j].getEmitProb(emission) gamma.setValue(t, j, tempArray.getValue(maxIndex) + self.safeLog(observationLikelihood)) phi.setValue(t, j, maxIndex) qs.setValue(sequenceLength - 1, gamma.getRowVector(sequenceLength - 1).maxIndex()) result.insert(0, self.states[int(qs.getValue(sequenceLength - 1))].getState()) for i in range(sequenceLength - 2, -1, -1): qs.setValue(i, phi.getValue(i + 1, int(qs.getValue(i + 1)))) result.insert(0, self.states[int(qs.getValue(i))].getState()) return result
class Hmm1(Hmm): __pi: Vector def __init__(self, states: set, observations: list, emittedSymbols: list): """ A constructor of Hmm1 class which takes a Set of states, an array of observations (which also consists of an array of states) and an array of instances (which also consists of an array of emitted symbols). The constructor calls its super method to calculate the emission probabilities for those states. PARAMETERS ---------- states : set A Set of states, consisting of all possible states for this problem. observations : list An array of instances, where each instance consists of an array of states. emittedSymbols : list An array of instances, where each instance consists of an array of symbols. """ super().__init__(states, observations, emittedSymbols) def calculatePi(self, observations: list): """ calculatePi calculates the prior probability vector (initial probabilities for each state) from a set of observations. For each observation, the function extracts the first state in that observation. Normalizing the counts of the states returns us the prior probabilities for each state. PARAMETERS ---------- observations : list A set of observations used to calculate the prior probabilities. """ self.__pi = Vector() self.__pi.initAllSame(self.stateCount, 0.0) for observation in observations: index = self.stateIndexes[observation[0]] self.__pi.addValue(index, 1.0) self.__pi.l1Normalize() def calculateTransitionProbabilities(self, observations: list): """ calculateTransitionProbabilities calculates the transition probabilities matrix from each state to another state. For each observation and for each transition in each observation, the function gets the states. Normalizing the counts of the pair of states returns us the transition probabilities. PARAMETERS ---------- observations : list A set of observations used to calculate the transition probabilities. """ self.transitionProbabilities = Matrix(self.stateCount, self.stateCount) for current in observations: for j in range(len(current) - 1): fromIndex = self.stateIndexes[current[j]] toIndex = self.stateIndexes[current[j + 1]] self.transitionProbabilities.increment(fromIndex, toIndex) self.transitionProbabilities.columnWiseNormalize() def __logOfColumn(self, column: int) -> Vector: """ logOfColumn calculates the logarithm of each value in a specific column in the transition probability matrix. PARAMETERS ---------- column : int Column index of the transition probability matrix. RETURNS ------- Vector A vector consisting of the logarithm of each value in the column in the transition probability matrix. """ result = Vector() for i in range(self.stateCount): result.add(self.safeLog(self.transitionProbabilities.getValue(i, column))) return result def viterbi(self, s: list) -> list: """ viterbi calculates the most probable state sequence for a set of observed symbols. PARAMETERS ---------- s : list A set of observed symbols. RETURNS ------- list The most probable state sequence as an {@link ArrayList}. """ result = [] sequenceLength = len(s) gamma = Matrix(sequenceLength, self.stateCount) phi = Matrix(sequenceLength, self.stateCount) qs = Vector(sequenceLength, 0) emission = s[0] for i in range(self.stateCount): observationLikelihood = self.states[i].getEmitProb(emission) gamma.setValue(0, i, self.safeLog(self.__pi.getValue(i)) + self.safeLog(observationLikelihood)) for t in range(1, sequenceLength): emission = s[t] for j in range(self.stateCount): tempArray = self.__logOfColumn(j) tempArray.addVector(gamma.getRowVector(t - 1)) maxIndex = tempArray.maxIndex() observationLikelihood = self.states[j].getEmitProb(emission) gamma.setValue(t, j, tempArray.getValue(maxIndex) + self.safeLog(observationLikelihood)) phi.setValue(t, j, maxIndex) qs.setValue(sequenceLength - 1, gamma.getRowVector(sequenceLength - 1).maxIndex()) result.insert(0, self.states[int(qs.getValue(sequenceLength - 1))].getState()) for i in range(sequenceLength - 2, -1, -1): qs.setValue(i, phi.getValue(i + 1, int(qs.getValue(i + 1)))) result.insert(0, self.states[int(qs.getValue(i))].getState()) return result
def __init__(self, trainSet: InstanceList, validationSet: InstanceList, parameters: DeepNetworkParameter): """ Constructor that takes two InstanceList train set and validation set and DeepNetworkParameter as inputs. First it sets the class labels, their sizes as K and the size of the continuous attributes as d of given train set and allocates weights and sets the best weights. At each epoch, it shuffles the train set and loops through the each item of that train set, it multiplies the weights Matrix with input Vector than applies the sigmoid function and stores the result as hidden and add bias. Then updates weights and at the end it compares the performance of these weights with validation set. It updates the bestClassificationPerformance and bestWeights according to the current situation. At the end it updates the learning rate via etaDecrease value and finishes with clearing the weights. PARAMETERS ---------- trainSet : InstanceList InstanceList to be used as trainSet. validationSet : InstanceList InstanceList to be used as validationSet. parameters : DeepNetworkParameter DeepNetworkParameter input. """ super().__init__(trainSet) deltaWeights = [] hidden = [] hiddenBiased = [] self.__allocateWeights(parameters) bestWeights = self.__setBestWeights() bestClassificationPerformance = ClassificationPerformance(0.0) epoch = parameters.getEpoch() learningRate = parameters.getLearningRate() for i in range(epoch): trainSet.shuffle(parameters.getSeed()) for j in range(trainSet.size()): self.createInputVector(trainSet.get(j)) hidden.clear() hiddenBiased.clear() deltaWeights.clear() for k in range(self.__hiddenLayerSize): if k == 0: hidden.append( self.calculateHidden(self.x, self.__weights[k])) else: hidden.append( self.calculateHidden(hiddenBiased[k - 1], self.__weights[k])) hiddenBiased.append(hidden[k].biased()) rMinusY = self.calculateRMinusY( trainSet.get(j), hiddenBiased[self.__hiddenLayerSize - 1], self.__weights[len(self.__weights) - 1]) deltaWeights.insert( 0, Matrix(rMinusY, hiddenBiased[self.__hiddenLayerSize - 1])) for k in range(len(self.__weights) - 2, -1, -1): oneMinusHidden = self.calculateOneMinusHidden(hidden[k]) tmph = deltaWeights[0].elementProduct( self.__weights[k + 1]).sumOfRows() tmph.remove(0) tmpHidden = oneMinusHidden.elementProduct(tmph) if k == 0: deltaWeights.insert(0, Matrix(tmpHidden, self.x)) else: deltaWeights.insert( 0, Matrix(tmpHidden, hiddenBiased[k - 1])) for k in range(len(self.__weights)): deltaWeights[k].multiplyWithConstant(learningRate) self.__weights[k].add(deltaWeights[k]) currentClassificationPerformance = self.testClassifier( validationSet) if currentClassificationPerformance.getAccuracy( ) > bestClassificationPerformance.getAccuracy(): bestClassificationPerformance = currentClassificationPerformance bestWeights = self.__setBestWeights() learningRate *= parameters.getEtaDecrease() self.__weights.clear() for m in bestWeights: self.__weights.append(m)
def __linearRegressionOnCountsOfCounts(self, countsOfCounts: list) -> list: """ Given counts of counts, this function will calculate the estimated counts of counts c$^*$ with Good-Turing smoothing. First, the algorithm filters the non-zero counts from counts of counts array and constructs c and r arrays. Then it constructs Z_n array with Z_n = (2C_n / (r_{n+1} - r_{n-1})). The algorithm then uses simple linear regression on Z_n values to estimate w_1 and w_0, where log(N[i]) = w_1log(i) + w_0 PARAMETERS ---------- countsOfCounts : list Counts of counts. countsOfCounts[1] is the number of words occurred once in the corpus. countsOfCounts[i] is the number of words occurred i times in the corpus. RETURNS ------ list Estimated counts of counts array. N[1] is the estimated count for out of vocabulary words. """ N = [0.0] * len(countsOfCounts) r = [] c = [] for i in range(1, len(countsOfCounts)): if countsOfCounts[i] != 0: r.append(i) c.append(countsOfCounts[i]) A = Matrix(2, 2) y = Vector(2, 0) for i in range(len(r)): xt = math.log(r[i]) if i == 0: rt = math.log(c[i]) else: if i == len(r) - 1: rt = math.log((1.0 * c[i]) / (r[i] - r[i - 1])) else: rt = math.log((2.0 * c[i]) / (r[i + 1] - r[i - 1])) A.addValue(0, 0, 1.0) A.addValue(0, 1, xt) A.addValue(1, 0, xt) A.addValue(1, 1, xt * xt) y.addValue(0, rt) y.addValue(1, rt * xt) A.inverse() w = A.multiplyWithVectorFromRight(y) w0 = w.getValue(0) w1 = w.getValue(1) for i in range(1, len(countsOfCounts)): N[i] = math.exp(math.log(i) * w1 + w0) return N
class NeuralNetwork: __wordVectors: Matrix __wordVectorUpdate: Matrix __vocabulary: Vocabulary __parameter: WordToVecParameter __corpus: Corpus __expTable: list EXP_TABLE_SIZE = 1000 MAX_EXP = 6 def __init__(self, corpus: Corpus, parameter: WordToVecParameter): """ Constructor for the NeuralNetwork class. Gets corpus and network parameters as input and sets the corresponding parameters first. After that, initializes the network with random weights between -0.5 and 0.5. Constructs vector update matrix and prepares the exp table. PARAMETERS ---------- corpus : Corpus Corpus used to train word vectors using Word2Vec algorithm. parameter : WordToVecParameter Parameters of the Word2Vec algorithm. """ self.__vocabulary = Vocabulary(corpus) self.__parameter = parameter self.__corpus = corpus self.__wordVectors = Matrix(self.__vocabulary.size(), self.__parameter.getLayerSize(), -0.5, 0.5) self.__wordVectorUpdate = Matrix(self.__vocabulary.size(), self.__parameter.getLayerSize()) self.__prepareExpTable() def __prepareExpTable(self): """ Constructs the fast exponentiation table. Instead of taking exponent at each time, the algorithm will lookup the table. """ self.__expTable = [0.0] * (NeuralNetwork.EXP_TABLE_SIZE + 1) for i in range(NeuralNetwork.EXP_TABLE_SIZE): self.__expTable[i] = math.exp( (i / (NeuralNetwork.EXP_TABLE_SIZE + 0.0) * 2 - 1) * NeuralNetwork.MAX_EXP) self.__expTable[i] = self.__expTable[i] / (self.__expTable[i] + 1) def train(self) -> VectorizedDictionary: """ Main method for training the Word2Vec algorithm. Depending on the training parameter, CBox or SkipGram algorithm is applied. RETURNS ------- VectorizedDictionary Dictionary of word vectors. """ result = VectorizedDictionary() if self.__parameter.isCbow(): self.__trainCbow() else: self.__trainSkipGram() for i in range(self.__vocabulary.size()): result.addWord( VectorizedWord( self.__vocabulary.getWord(i).getName(), self.__wordVectors.getRowVector(i))) return result def __calculateG(self, f: float, alpha: float, label: float) -> float: """ Calculates G value in the Word2Vec algorithm. PARAMETERS ---------- f : float F value. alpha : float Learning rate alpha. label : float Label of the instance. RETURNS ------- float Calculated G value. """ if f > NeuralNetwork.MAX_EXP: return (label - 1) * alpha elif f < -NeuralNetwork.MAX_EXP: return label * alpha else: return (label - self.__expTable[int( (f + NeuralNetwork.MAX_EXP) * (NeuralNetwork.EXP_TABLE_SIZE // NeuralNetwork.MAX_EXP // 2))] ) * alpha def __trainCbow(self): """ Main method for training the CBow version of Word2Vec algorithm. """ iteration = Iteration(self.__corpus, self.__parameter) currentSentence = self.__corpus.getSentence( iteration.getSentenceIndex()) outputs = Vector() outputs.initAllSame(self.__parameter.getLayerSize(), 0.0) outputUpdate = Vector() outputUpdate.initAllSame(self.__parameter.getLayerSize(), 0) self.__corpus.shuffleSentences(1) while iteration.getIterationCount( ) < self.__parameter.getNumberOfIterations(): iteration.alphaUpdate() wordIndex = self.__vocabulary.getPosition( currentSentence.getWord(iteration.getSentencePosition())) currentWord = self.__vocabulary.getWord(wordIndex) outputs.clear() outputUpdate.clear() b = randrange(self.__parameter.getWindow()) cw = 0 for a in range(b, self.__parameter.getWindow() * 2 + 1 - b): c = iteration.getSentencePosition( ) - self.__parameter.getWindow() + a if a != self.__parameter.getWindow( ) and currentSentence.safeIndex(c): lastWordIndex = self.__vocabulary.getPosition( currentSentence.getWord(c)) outputs.addVector( self.__wordVectors.getRowVector(lastWordIndex)) cw = cw + 1 if cw > 0: outputs.divide(cw) if self.__parameter.isHierarchicalSoftMax(): for d in range(currentWord.getCodeLength()): l2 = currentWord.getPoint(d) f = outputs.dotProduct( self.__wordVectorUpdate.getRowVector(l2)) if f <= -NeuralNetwork.MAX_EXP or f >= NeuralNetwork.MAX_EXP: continue else: f = self.__expTable[int( (f + NeuralNetwork.MAX_EXP) * (NeuralNetwork.EXP_TABLE_SIZE // NeuralNetwork.MAX_EXP // 2))] g = (1 - currentWord.getCode(d) - f) * iteration.getAlpha() outputUpdate.addVector( self.__wordVectorUpdate.getRowVector(l2).product( g)) self.__wordVectorUpdate.addRowVector( l2, outputs.product(g)) else: for d in range(self.__parameter.getNegativeSamplingSize() + 1): if d == 0: target = wordIndex label = 1 else: target = self.__vocabulary.getTableValue( randrange(self.__vocabulary.getTableSize())) if target == 0: target = randrange(self.__vocabulary.size() - 1) + 1 if target == wordIndex: continue label = 0 l2 = target f = outputs.dotProduct( self.__wordVectorUpdate.getRowVector(l2)) g = self.__calculateG(f, iteration.getAlpha(), label) outputUpdate.addVector( self.__wordVectorUpdate.getRowVector(l2).product( g)) self.__wordVectorUpdate.addRowVector( l2, outputs.product(g)) for a in range(b, self.__parameter.getWindow() * 2 + 1 - b): c = iteration.getSentencePosition( ) - self.__parameter.getWindow() + a if a != self.__parameter.getWindow( ) and currentSentence.safeIndex(c): lastWordIndex = self.__vocabulary.getPosition( currentSentence.getWord(c)) self.__wordVectors.addRowVector( lastWordIndex, outputUpdate) currentSentence = iteration.sentenceUpdate(currentSentence) def __trainSkipGram(self): """ Main method for training the SkipGram version of Word2Vec algorithm. """ iteration = Iteration(self.__corpus, self.__parameter) currentSentence = self.__corpus.getSentence( iteration.getSentenceIndex()) outputs = Vector() outputs.initAllSame(self.__parameter.getLayerSize(), 0.0) outputUpdate = Vector() outputUpdate.initAllSame(self.__parameter.getLayerSize(), 0) self.__corpus.shuffleSentences(1) while iteration.getIterationCount( ) < self.__parameter.getNumberOfIterations(): iteration.alphaUpdate() wordIndex = self.__vocabulary.getPosition( currentSentence.getWord(iteration.getSentencePosition())) currentWord = self.__vocabulary.getWord(wordIndex) outputs.clear() outputUpdate.clear() b = randrange(self.__parameter.getWindow()) for a in range(b, self.__parameter.getWindow() * 2 + 1 - b): c = iteration.getSentencePosition( ) - self.__parameter.getWindow() + a if a != self.__parameter.getWindow( ) and currentSentence.safeIndex(c): lastWordIndex = self.__vocabulary.getPosition( currentSentence.getWord(c)) l1 = lastWordIndex outputUpdate.clear() if self.__parameter.isHierarchicalSoftMax(): for d in range(currentWord.getCodeLength()): l2 = currentWord.getPoint(d) f = self.__wordVectors.getRowVector(l1).dotProduct( self.__wordVectorUpdate.getRowVector(l2)) if f <= -NeuralNetwork.MAX_EXP or f >= NeuralNetwork.MAX_EXP: continue else: f = self.__expTable[int( (f + NeuralNetwork.MAX_EXP) * (NeuralNetwork.EXP_TABLE_SIZE // NeuralNetwork.MAX_EXP // 2))] g = (1 - currentWord.getCode(d) - f) * iteration.getAlpha() outputUpdate.addVector( self.__wordVectorUpdate.getRowVector( l2).product(g)) self.__wordVectorUpdate.addRowVector( l2, self.__wordVectors.getRowVector(l1).product(g)) else: for d in range( self.__parameter.getNegativeSamplingSize() + 1): if d == 0: target = wordIndex label = 1 else: target = self.__vocabulary.getTableValue( randrange( self.__vocabulary.getTableSize())) if target == 0: target = randrange( self.__vocabulary.size() - 1) + 1 if target == wordIndex: continue label = 0 l2 = target f = self.__wordVectors.getRowVector(l1).dotProduct( self.__wordVectorUpdate.getRowVector(l2)) g = self.__calculateG(f, iteration.getAlpha(), label) outputUpdate.addVector( self.__wordVectorUpdate.getRowVector( l2).product(g)) self.__wordVectorUpdate.addRowVector( l2, self.__wordVectors.getRowVector(l1).product(g)) self.__wordVectors.addRowVector(l1, outputUpdate) currentSentence = iteration.sentenceUpdate(currentSentence)
def viterbi(self, s: list) -> list: """ viterbi calculates the most probable state sequence for a set of observed symbols. PARAMETERS ---------- s : list A set of observed symbols. RETURNS ------- list The most probable state sequence as an {@link ArrayList}. """ result = [] sequenceLength = len(s) gamma = Matrix(sequenceLength, self.stateCount * self.stateCount) phi = Matrix(sequenceLength, self.stateCount * self.stateCount) qs = Vector(sequenceLength, 0) emission1 = s[0] emission2 = s[1] for i in range(self.stateCount): for j in range(self.stateCount): observationLikelihood = self.states[i].getEmitProb( emission1) * self.states[j].getEmitProb(emission2) gamma.setValue( 1, i * self.stateCount + j, self.safeLog(self.__pi.getValue(i, j)) + self.safeLog(observationLikelihood)) for t in range(2, sequenceLength): emission = s[t] for j in range(self.stateCount * self.stateCount): current = self.__logOfColumn(j) previous = gamma.getRowVector(t - 1).skipVector( self.stateCount, j // self.stateCount) current.addVector(previous) maxIndex = current.maxIndex() observationLikelihood = self.states[ j % self.stateCount].getEmitProb(emission) gamma.setValue( t, j, current.getValue(maxIndex) + self.safeLog(observationLikelihood)) phi.setValue(t, j, maxIndex * self.stateCount + j // self.stateCount) qs.setValue(sequenceLength - 1, gamma.getRowVector(sequenceLength - 1).maxIndex()) result.insert( 0, self.states[int(qs.getValue(sequenceLength - 1)) % self.stateCount].getState()) for i in range(sequenceLength - 2, 0, -1): qs.setValue(i, phi.getValue(i + 1, int(qs.getValue(i + 1)))) result.insert( 0, self.states[int(qs.getValue(i)) % self.stateCount].getState()) result.insert( 0, self.states[int(qs.getValue(1)) // self.stateCount].getState()) return result
class MatrixTest(unittest.TestCase): def setUp(self): self.small = Matrix(3, 3) for i in range(3): for j in range(3): self.small.setValue(i, j, 1.0) self.v = Vector(3, 1.0) self.large = Matrix(1000, 1000) for i in range(1000): for j in range(1000): self.large.setValue(i, j, 1.0) self.medium = Matrix(100, 100) for i in range(100): for j in range(100): self.medium.setValue(i, j, 1.0) self.V = Vector(1000, 1.0) self.vr = Vector(100, 1.0) self.random = Matrix(100, 100, 1, 10, 1) self.originalSum = self.random.sumOfElements() self.identity = Matrix(100) def test_ColumnWiseNormalize(self): mClone = self.small.clone() mClone.columnWiseNormalize() self.assertEqual(3, mClone.sumOfElements()) MClone = self.large.clone() MClone.columnWiseNormalize() self.assertAlmostEqual(1000, MClone.sumOfElements(), 3) self.identity.columnWiseNormalize() self.assertEqual(100, self.identity.sumOfElements()) def test_MultiplyWithConstant(self): self.small.multiplyWithConstant(4) self.assertEqual(36, self.small.sumOfElements()) self.small.divideByConstant(4) self.large.multiplyWithConstant(1.001) self.assertAlmostEqual(1001000, self.large.sumOfElements(), 3) self.large.divideByConstant(1.001) self.random.multiplyWithConstant(3.6) self.assertAlmostEqual(self.originalSum * 3.6, self.random.sumOfElements(), 4) self.random.divideByConstant(3.6) def test_DivideByConstant(self): self.small.divideByConstant(4) self.assertEqual(2.25, self.small.sumOfElements()) self.small.multiplyWithConstant(4) self.large.divideByConstant(10) self.assertAlmostEqual(100000, self.large.sumOfElements(), 3) self.large.multiplyWithConstant(10) self.random.divideByConstant(3.6) self.assertAlmostEqual(self.originalSum / 3.6, self.random.sumOfElements(), 4) self.random.multiplyWithConstant(3.6) def test_Add(self): self.random.add(self.identity) self.assertAlmostEqual(self.originalSum + 100, self.random.sumOfElements(), 4) self.random.subtract(self.identity) def test_AddVector(self): self.large.addRowVector(4, self.V) self.assertEqual(1001000, self.large.sumOfElements(), 0.0) self.V.multiply(-1.0) self.large.addRowVector(4, self.V) self.V.multiply(-1.0) def test_Subtract(self): self.random.subtract(self.identity) self.assertAlmostEqual(self.originalSum - 100, self.random.sumOfElements(), 4) self.random.add(self.identity) def test_MultiplyWithVectorFromLeft(self): result = self.small.multiplyWithVectorFromLeft(self.v) self.assertEqual(9, result.sumOfElements()) result = self.large.multiplyWithVectorFromLeft(self.V) self.assertEqual(1000000, result.sumOfElements()) result = self.random.multiplyWithVectorFromLeft(self.vr) self.assertAlmostEqual(self.originalSum, result.sumOfElements(), 4) def test_MultiplyWithVectorFromRight(self): result = self.small.multiplyWithVectorFromRight(self.v) self.assertEqual(9, result.sumOfElements()) result = self.large.multiplyWithVectorFromRight(self.V) self.assertEqual(1000000, result.sumOfElements()) result = self.random.multiplyWithVectorFromRight(self.vr) self.assertAlmostEqual(self.originalSum, result.sumOfElements(), 4) def test_ColumnSum(self): self.assertEqual(3, self.small.columnSum(randrange(3))) self.assertEqual(1000, self.large.columnSum(randrange(1000))) self.assertEqual(1, self.identity.columnSum(randrange(100))) def test_SumOfRows(self): self.assertEqual(9, self.small.sumOfRows().sumOfElements()) self.assertEqual(1000000, self.large.sumOfRows().sumOfElements()) self.assertEqual(100, self.identity.sumOfRows().sumOfElements()) self.assertAlmostEqual(self.originalSum, self.random.sumOfRows().sumOfElements(), 3) def test_RowSum(self): self.assertEqual(3, self.small.rowSum(randrange(3))) self.assertEqual(1000, self.large.rowSum(randrange(1000))) self.assertEqual(1, self.identity.rowSum(randrange(100))) def test_Multiply(self): result = self.small.multiply(self.small) self.assertEqual(27, result.sumOfElements()) result = self.medium.multiply(self.medium) self.assertEqual(1000000.0, result.sumOfElements()) result = self.random.multiply(self.identity) self.assertEqual(self.originalSum, result.sumOfElements()) result = self.identity.multiply(self.random) self.assertEqual(self.originalSum, result.sumOfElements()) def test_ElementProduct(self): result = self.small.elementProduct(self.small) self.assertEqual(9, result.sumOfElements()) result = self.large.elementProduct(self.large) self.assertEqual(1000000, result.sumOfElements()) result = self.random.elementProduct(self.identity) self.assertEqual(result.trace(), result.sumOfElements()) def test_SumOfElements(self): self.assertEqual(9, self.small.sumOfElements()) self.assertEqual(1000000, self.large.sumOfElements()) self.assertEqual(100, self.identity.sumOfElements()) self.assertEqual(self.originalSum, self.random.sumOfElements()) def test_Trace(self): self.assertEqual(3, self.small.trace()) self.assertEqual(1000, self.large.trace()) self.assertEqual(100, self.identity.trace()) def test_Transpose(self): self.assertEqual(9, self.small.transpose().sumOfElements()) self.assertEqual(1000000, self.large.transpose().sumOfElements()) self.assertEqual(100, self.identity.transpose().sumOfElements()) self.assertAlmostEqual(self.originalSum, self.random.transpose().sumOfElements(), 3) def test_IsSymmetric(self): self.assertTrue(self.small.isSymmetric()) self.assertTrue(self.large.isSymmetric()) self.assertTrue(self.identity.isSymmetric()) self.assertFalse(self.random.isSymmetric()) def test_Determinant(self): self.assertEqual(0, self.small.determinant()) self.assertEqual(0, self.large.determinant()) self.assertEqual(1, self.identity.determinant()) def test_Inverse(self): self.identity.inverse() self.assertEqual(100, self.identity.sumOfElements()) self.random.inverse() self.random.inverse() self.assertAlmostEqual(self.originalSum, self.random.sumOfElements(), 5) def test_Characteristics(self): vectors = self.small.characteristics() self.assertEqual(2, len(vectors)) vectors = self.identity.characteristics() self.assertEqual(100, len(vectors)) vectors = self.medium.characteristics() self.assertEqual(46, len(vectors))