def addBigramToDict(word1, word2): word1b = utils.lowerWordOrList(word1) if word1b in bigramsDict: (w1, w2) = bigramsDict[word1b] w2.append(word2) else: bigramsDict[word1b] = (word1, [word2])
def generateTextUsingMarkovChain(inputMarkov, wordsPerState): f = open(inputMarkov, 'r') jsonData = f.read() f.close() data = json.JSONDecoder().decode(jsonData) words = [] if wordsPerState == 1: prev = config.startSymbol elif wordsPerState == 2: prev = (config.startSymbol, config.startSymbol) markovDict = {} for bigram in data: markovDict[utils.lowerWordOrList(utils.listToTuple( bigram[0]))] = bigram[1] while True: m = markovDict[utils.lowerWordOrList(prev)] denominator = m[0][1][1] rnd = random.randint(1, denominator) total = 0 nextWord = None for word in m: total = total + word[1][0] if total >= rnd: nextWord = word[0] break if nextWord == config.startSymbol: break words.append(nextWord) if wordsPerState == 1: prev = nextWord elif wordsPerState == 2: prev = (prev[1], nextWord) return words
def decodeWordToBitsRange(word, previousWord, markovChainDict, maxDigits, bitsRange): # get probabilities for the start word wordProbs = markovChainDict[utils.lowerWordOrList(previousWord)][1] # get the range covered by every word wordRanges = utils.computeWordRanges(bitsRange, wordProbs, maxDigits) bestRange = filter(lambda wr: utils.lowerWord(wr[0]) == utils.lowerWord(word), wordRanges) return bestRange[0][1]
def generateTextUsingMarkovChain(inputMarkov, wordsPerState): f = open(inputMarkov, 'r') jsonData = f.read() f.close() data = json.JSONDecoder().decode(jsonData) words = [] if wordsPerState == 1: prev = config.startSymbol elif wordsPerState == 2: prev = (config.startSymbol, config.startSymbol) markovDict = {} for bigram in data: markovDict[utils.lowerWordOrList(utils.listToTuple(bigram[0]))] = bigram[1] while True: m = markovDict[utils.lowerWordOrList(prev)] denominator = m[0][1][1] rnd = random.randint(1, denominator) total = 0 nextWord = None for word in m: total = total + word[1][0] if total >= rnd: nextWord = word[0] break if nextWord == config.startSymbol: break words.append(nextWord) if wordsPerState == 1: prev = nextWord elif wordsPerState == 2: prev = (prev[1], nextWord) return words
def decodeWordToBitsRange(word, previousWord, markovChainDict, maxDigits, bitsRange): # get probabilities for the start word wordProbs = markovChainDict[utils.lowerWordOrList(previousWord)][1] # get the range covered by every word wordRanges = utils.computeWordRanges(bitsRange, wordProbs, maxDigits) bestRange = filter( lambda wr: utils.lowerWord(wr[0]) == utils.lowerWord(word), wordRanges) return bestRange[0][1]
def encodeBitsToWord(bitsField, bitsRange, startWord, markovChainDict): # get probabilities for the start word wordProbs = markovChainDict[utils.lowerWordOrList(startWord)][1] # get the range covered by every word wordRanges = utils.computeWordRanges(bitsRange, wordProbs, bitsField.totalFieldLen()) # look for the right partition for the bits precision = len(wordRanges[0][1][0]) bits = bitsField.getFirstNBits(precision) bestWord = list( filter( lambda wr: utils.binaryLowerEqualThan(wr[1][0], bits) and utils. binaryLowerEqualThan(bits, wr[1][1]), wordRanges)) return bestWord[0]
def encodeBitsToWord(bitsField, bitsRange, startWord, markovChainDict): # get probabilities for the start word wordProbs = markovChainDict[utils.lowerWordOrList(startWord)][1] # get the range covered by every word wordRanges = utils.computeWordRanges(bitsRange, wordProbs, bitsField.totalFieldLen()) # look for the right partition for the bits precision = len(wordRanges[0][1][0]) bits = bitsField.getFirstNBits(precision) bestWord = filter( lambda wr: utils.binaryLowerEqualThan(wr[1][0], bits) and utils.binaryLowerEqualThan(bits, wr[1][1]), wordRanges) return bestWord[0]