示例#1
0
    def walk(self, num_words):
        words = []
        histogram = Dictogram(self.word_list)
        next_word = histogram.sample()

        words.append(next_word)
        for i in range(num_words - 1):
            if len(self.markov_chain) > 0:
                next_word = histogram.sample()
                words.append(next_word)

        sentence = ' '.join(words)

        return sentence
示例#2
0
def walk(word_list, length):
    
    sentence = []
    histogram = Dictogram(word_list)
    next_word = histogram.sample()
    sentence.append(next_word)
    for i in range(length - 1):
        chain = new_chain(word_list, next_word)
        if len(chain) > 0:
            next_word = chain.sample()
            sentence.append(next_word)

    return sentence
示例#3
0
class MarkovChain():
    def __init__(self, text):
        self.nodes = self.generate_nodes(text)
        self.heads = Dictogram(text)

    def generate_nodes(self, text):
        '''iterates across list of words creating a list of nodes'''
        nodes = {
        }  # seperate list to keep track of nodes we've already added and their respective object

        for word in range(
                len(text)):  # for each word in the text we're analysing
            this_word = text[word]

            if this_word in nodes.keys(
            ):  # if the word has already been added as a key
                if not (word + 2) > len(
                        text):  # checks that next word index is inbounds
                    nodes[this_word].add_count(
                        text[word + 1])  # add a token of the next word
            else:
                nodes[this_word] = Node(
                    this_word)  # if not we create a new node
                if not (word + 2) > len(
                        text):  # checks that next word index is inbounds
                    nodes[this_word].add_count(
                        text[word + 1])  # add a token of the next word

        return nodes

    def generate_sentence(self, num_words):
        '''generates a sentence with max-length (n) of words'''
        sentence = str()

        this_word = self.heads.sample(
        )  # samples text histogram in order to find a lead node

        for i in range(num_words):
            sentence += this_word  # word gets appended onto the sentence

            if self.nodes[
                    this_word].types == 0:  # checks if we're at a end node
                return sentence

            if not i == num_words:  # if we're not on the last word
                sentence += ' '  # adds a space

            this_word = self.nodes[this_word].walk(
            )  # samples the current node for the next word

        return sentence
def random_walk(word_list, length):
    """Start sentence with sample word from histogram, and then
    sample each new histogram chain to get the next word, add then to sentence.
    """
    sentence = []
    histogram = Dictogram(word_list)
    next_word = histogram.sample()
    sentence.append(next_word)
    for i in range(length - 1):
        chain = new_chain(word_list, next_word)
        if len(chain) > 0:
            next_word = chain.sample()
            sentence.append(next_word)

    return sentence
    def sample(self):
        """The first word to start the chain"""
        next_words = []
        main_histogram = Dictogram(self.word_list)

        next_word = main_histogram.sample()
        next_words.append(next_word)
        chain = self.next_chain(next_word)

        for i in range(self.order - 1):
            if len(chain) > 0:
                word_next = chain.sample()
                next_words.append(word_next)
                chain = self.next_chain(word_next)
        sample = " ".join(next_words)
        return sample
class Markov_Chain(dict):
    def __init__(self, word_list, nth_order = 1):
        """Initialize the class and create variables"""
        self.word_list = create_list(word_list)
        self.dictionary_histogram = Dictogram(self.word_list)
        self.nth_order = nth_order

        """ Creating the Markov Chain """
        #Edit so as to get rid of length of list minus 1 and it doesnt run errors
        def create_chain(self):
            pass
        for index in range(len(self.word_list)-nth_order):
            word = self.word_list[index]
            next_word = self.word_list[index+1]
            word_after_next = self.word_list[index+2]

            # -----------------------------------------
            # if len(self.word_list)==index+1:
            #     next_word = None
            # else:
            #     next_word = self.word_list[index+1]
            # -------------------------------------------
            if (word,next_word) not in self:
                small_dicto = Dictogram([(next_word,word_after_next)])
                self[(word,next_word)] = small_dicto

            else:
                self[(word,next_word)].add_count((next_word,word_after_next))

    def creating_sentence(self, length = 10):
        """Create sentence using both dictogram and the markov chain just made."""
        #Edit so it adds periodss and not spaces at the end of a sentence.
        created_sentence = ""
        adding_word = self.dictionary_histogram.sample()
        created_sentence += adding_word+" "
        length = length - 1

        last_word = adding_word

        while length > 0:
            next_word_for = self[adding_word].sample()
            created_sentence += next_word_for+" "
            adding_word = next_word_for
            length -= 1


        return created_sentence
示例#7
0
def walk(word_list, amount):
    '''Starts off the sentence with a sampled word from the initial histogram. Continues
    to sample each new histogram to create a list of words.
    word_list = list
    amount = int
    '''
    sentence = []
    main_histogram = Dictogram(word_list)
    next_word = main_histogram.sample()
    sentence.append(next_word)
    for i in range((amount) - 1):
        chain = next_chain(word_list, next_word)
        if len(chain) > 0:
            next_word = chain.sample()
            sentence.append(next_word)

    return sentence
示例#8
0
def test_sample():
    dictogram = Dictogram(fish_words)
    # Create a list of 10,000 word samples from histogram
    samples_list = [dictogram.sample() for _ in range(10000)]
    # Create a histogram to count frequency of each word
    samples_hist = Dictogram(samples_list)
    # Check each word in original histogram
    for word, count in dictogram.dictionary_histogram.items():
        # Calculate word's observed frequency
        observed_freq = count / dictogram.tokens
        # Calculate word's sampled frequency
        samples = samples_hist.frequency(word)
        sampled_freq = samples / samples_hist.tokens
        # Verify word's sampled frequency is close to observed frequency
        lower_bound = observed_freq * 0.9  # 10% below = 90% = 0.9
        upper_bound = observed_freq * 1.1  # 10% above = 110% = 1.1
        assert lower_bound <= sampled_freq <= upper_bound
示例#9
0
class Markov_Chain(dict):
    def __init__(self, word_list):
        """Initialize the class and create variables"""
        self.word_list = word_list
        self.dictionary_histogram = Dictogram(self.word_list)
        """ Creating the Markov Chain """
        #Edit so as to get rid of length of list minus 1 and it doesnt run errors
        for index in range(len(self.word_list) - 1):
            word = self.word_list[index]

            if len(self.word_list) == index + 1:
                next_word = None
            else:
                next_word = self.word_list[index + 1]

            if word not in self:
                small_dicto = Dictogram([next_word])
                self[word] = small_dicto

            else:
                self[word].add_count(next_word)

    def creating_sentence(self, length=10):
        """Create sentence using both dictogram and the markov chain just made."""
        #Edit so it adds periodss and not spaces at the end of a sentence.
        created_sentence = ""
        adding_word = self.dictionary_histogram.sample()
        created_sentence += adding_word + " "
        length = length - 1

        last_word = adding_word

        while length > 0:
            next_word_for = self[adding_word].sample()
            created_sentence += next_word_for + " "
            adding_word = next_word_for
            # if adding_word in self:
            #
            #     pass
            # else:
            #     pass

            length -= 1

        return created_sentence
def order_sample(word_list, order=2):
    histogram = Dictogram(word_list)
    next_words = []

    # sample a random word from histogram
    next_word_string = histogram.sample()
    # find all the words that come after
    chain = new_chain(word_list, next_word_string)
    # append both words to a list
    next_words.append(next_word_string)

    for i in range(order - 1):
        if len(chain) > 0:
            next_word_string = chain.sample()
            next_words.append(next_word_string)
            chain = new_chain(word_list, next_word_string)

    words_str = " ".join(next_words)
    return words_str
示例#11
0
class MarkovChain():
    def __init__(self, order=2, starttoken='!START', stoptoken='!STOP'):
        self.order = order # number of orders to generate the chain with

        self.nodes = dict()
        self.starttokens = Dictogram()
        self.stoptokens = Dictogram()

        self.STARTTOKEN = starttoken
        self.STOPTOKEN = stoptoken
    
    def get_phrase(self, text_q):
        phrase = () # represent the n words seperated
        this_q = copy.copy(text_q)

        for i in range(self.order): # generates the 'phrase' based off of the order which dictates the number of words we look at
            this_word = (this_q.dequeue(),) # stores the word we're currently looking at
            
            phrase += this_word

        if self.STARTTOKEN in phrase:
            self.starttokens.add_count(phrase)
        return phrase

    def gen_nodes(self, text):
        '''iterates across list of words creating or modifying nodes'''
        text_q = Queue()
        for token in text:
            text_q.enqueue(token)

        while text_q.length() > self.order: # for each first word in the text we're analysing
            this_phrase = self.get_phrase(text_q)
            text_q.dequeue()
            next_phrase = self.get_phrase(text_q)

            if this_phrase in self.nodes.keys(): # if the phrase has already been added as a key
                if next_phrase:
                    self.nodes[this_phrase].add_count(next_phrase) # add a token of the next phrase
            else:
                self.nodes[this_phrase] = Node(this_phrase) # if not we create a new node
                if next_phrase:
                    self.nodes[this_phrase].add_count(next_phrase) # add a token of the next phrase

    def get_start(self):
        if self.order == 1:
            return self.nodes[(self.STARTTOKEN),].walk()
        return self.starttokens.sample()

    def gen_sentence(self):
        '''generates a sentence starting with a start token'''
        sentence = str()

        this_phrase = self.get_start() # start with the start token
    
        while not self.STOPTOKEN in this_phrase: # while we don't run into a stop token

            slice = self.order - 1

            sentence += ' '.join(this_phrase[slice:]) + ' ' # joins phrase (excluding the first word) into a string
            this_phrase = self.nodes[this_phrase].walk() # samples the current node for the next word

        if not self.order == 1:
            sentence += ' '.join(this_phrase[slice:1]) # joins phrase (exlcuding the last word) into a string

        return sentence