def addBigramToDict(word1, word2):
        word1b = utils.lowerWordOrList(word1)

        if word1b in bigramsDict:
            (w1, w2) = bigramsDict[word1b]
            w2.append(word2)
        else:
            bigramsDict[word1b] = (word1, [word2])
Exemplo n.º 2
0
	def addBigramToDict(word1, word2):
		word1b = utils.lowerWordOrList(word1)

		if word1b in bigramsDict:
			(w1, w2) = bigramsDict[word1b]
			w2.append(word2)
		else:
			bigramsDict[word1b] = (word1, [word2])
def generateTextUsingMarkovChain(inputMarkov, wordsPerState):
    f = open(inputMarkov, 'r')
    jsonData = f.read()
    f.close()

    data = json.JSONDecoder().decode(jsonData)

    words = []
    if wordsPerState == 1:
        prev = config.startSymbol
    elif wordsPerState == 2:
        prev = (config.startSymbol, config.startSymbol)

    markovDict = {}
    for bigram in data:
        markovDict[utils.lowerWordOrList(utils.listToTuple(
            bigram[0]))] = bigram[1]

    while True:
        m = markovDict[utils.lowerWordOrList(prev)]
        denominator = m[0][1][1]
        rnd = random.randint(1, denominator)
        total = 0
        nextWord = None

        for word in m:
            total = total + word[1][0]
            if total >= rnd:
                nextWord = word[0]
                break

        if nextWord == config.startSymbol:
            break

        words.append(nextWord)

        if wordsPerState == 1:
            prev = nextWord
        elif wordsPerState == 2:
            prev = (prev[1], nextWord)

    return words
Exemplo n.º 4
0
def decodeWordToBitsRange(word, previousWord, markovChainDict, maxDigits, bitsRange):

	# get probabilities for the start word
	wordProbs = markovChainDict[utils.lowerWordOrList(previousWord)][1]

	# get the range covered by every word
	wordRanges = utils.computeWordRanges(bitsRange, wordProbs, maxDigits)

	bestRange = filter(lambda wr: utils.lowerWord(wr[0]) == utils.lowerWord(word), wordRanges)

	return bestRange[0][1]
Exemplo n.º 5
0
def generateTextUsingMarkovChain(inputMarkov, wordsPerState):
	f = open(inputMarkov, 'r')
	jsonData = f.read()
	f.close()

	data = json.JSONDecoder().decode(jsonData)

	words = []
	if wordsPerState == 1:
		prev = config.startSymbol
	elif wordsPerState == 2:
		prev = (config.startSymbol, config.startSymbol)

	markovDict = {}
	for bigram in data:
		markovDict[utils.lowerWordOrList(utils.listToTuple(bigram[0]))] = bigram[1]

	while True:
		m = markovDict[utils.lowerWordOrList(prev)]
		denominator = m[0][1][1]
		rnd = random.randint(1, denominator)
		total = 0
		nextWord = None

		for word in m:
			total = total + word[1][0]
			if total >= rnd:
				nextWord = word[0]
				break

		if nextWord == config.startSymbol:
			break

		words.append(nextWord)

		if wordsPerState == 1:
			prev = nextWord
		elif wordsPerState == 2:
			prev = (prev[1], nextWord)

	return words
def decodeWordToBitsRange(word, previousWord, markovChainDict, maxDigits,
                          bitsRange):

    # get probabilities for the start word
    wordProbs = markovChainDict[utils.lowerWordOrList(previousWord)][1]

    # get the range covered by every word
    wordRanges = utils.computeWordRanges(bitsRange, wordProbs, maxDigits)

    bestRange = filter(
        lambda wr: utils.lowerWord(wr[0]) == utils.lowerWord(word), wordRanges)

    return bestRange[0][1]
def encodeBitsToWord(bitsField, bitsRange, startWord, markovChainDict):

    # get probabilities for the start word
    wordProbs = markovChainDict[utils.lowerWordOrList(startWord)][1]

    # get the range covered by every word
    wordRanges = utils.computeWordRanges(bitsRange, wordProbs,
                                         bitsField.totalFieldLen())

    # look for the right partition for the bits
    precision = len(wordRanges[0][1][0])
    bits = bitsField.getFirstNBits(precision)
    bestWord = list(
        filter(
            lambda wr: utils.binaryLowerEqualThan(wr[1][0], bits) and utils.
            binaryLowerEqualThan(bits, wr[1][1]), wordRanges))
    return bestWord[0]
Exemplo n.º 8
0
def encodeBitsToWord(bitsField, bitsRange, startWord, markovChainDict):

	# get probabilities for the start word
	wordProbs = markovChainDict[utils.lowerWordOrList(startWord)][1]

	# get the range covered by every word
	wordRanges = utils.computeWordRanges(bitsRange, wordProbs, bitsField.totalFieldLen())

	# look for the right partition for the bits
	precision = len(wordRanges[0][1][0])
	bits = bitsField.getFirstNBits(precision)

	bestWord = filter(
		lambda wr:
			utils.binaryLowerEqualThan(wr[1][0], bits) and utils.binaryLowerEqualThan(bits, wr[1][1]),
		wordRanges)

	return bestWord[0]