def constructDictionaryWithNonRareWords(self, level: int, probability: float) -> set: """ Constructs a dictionary of nonrare words with given N-Gram level and probability threshold. PARAMETERS ---------- level : int Level for counting words. Counts for different levels of the N-Gram can be set. If level = 1, N-Gram is treated as UniGram, if level = 2, N-Gram is treated as Bigram, etc. probability : float probability threshold for nonrare words. RETURNS ------- set set of nonrare words. """ result = set() wordCounter = CounterHashMap() self.rootNode.countWords(wordCounter, level) total = wordCounter.sumOfCounts() for symbol in wordCounter.keys(): if wordCounter[symbol] / total > probability: result.add(symbol) return result
def calculateEmissionProbabilities(self, state: object, observations: list, emittedSymbols: list) -> dict: """ calculateEmissionProbabilities calculates the emission probabilities for a specific state. The method takes the state, an array of observations (which also consists of an array of states) and an array of instances (which also consists of an array of emitted symbols). PARAMETERS ---------- states : set A Set of states, consisting of all possible states for this problem. observations : list An array of instances, where each instance consists of an array of states. emittedSymbols : list An array of instances, where each instance consists of an array of symbols. RETURNS ------- dict A HashMap. Emission probabilities for a single state. Contains a probability for each symbol emitted. """ counts = CounterHashMap() emissionProbabilities = {} for i in range(len(observations)): for j in range(len(observations[i])): currentState = observations[i][j] currentSymbol = emittedSymbols[i][j] if currentState == state: counts.put(currentSymbol) total = counts.sumOfCounts() for symbol in counts: emissionProbabilities[symbol] = counts[symbol] / total return emissionProbabilities
def test_PutNTimes2(self): counterHashMap = CounterHashMap() for i in range(1000): counterHashMap.putNTimes(randrange(1000), i + 1) self.assertEquals(500500, counterHashMap.sumOfCounts())
def test_SumOfCounts(self): counterHashMap = CounterHashMap() for i in range(1000): counterHashMap.put(randrange(1000)) self.assertEquals(1000, counterHashMap.sumOfCounts())