Пример #1
0
    def generateMargChain(self, key_word):
        """Return a Margarine Chain as Sentence
           Inspired by the Open Source project Margarine
           which uses a similar concept to the Markov Chain.
           MargChains go both ways, starting from the keyword
        """
        if not key_word: return None
        base_margs = Markov.select(Markov.q.second_wordID == key_word)
        base_margs = list(base_margs)
        if not base_margs: return None

        #base is random for now!!
        base = base_margs[int(random.random() * len(base_margs))]
        # Create the forward chain (keyword -> end)
        first_word = base.first_word.id
        second_word = base.second_word.id
        third_word = base.third_word.id

        second_half = [base.second_word.id]
        while third_word != 1 and len(second_half) < 12:
            #Loop until it hits EOF = id(1)
            second_half.append(third_word)
            #swap things forward
            first_word = second_word
            second_word = third_word
            #self.debug('first_word:%s second_word:%s' % (first_word, second_word))
            hits = Markov.select(
                AND(Markov.q.first_wordID == first_word,
                    Markov.q.second_wordID == second_word))
            hits = list(hits)
            choice = hits[int(random.random() * len(hits))]
            #choice = self.pickBestChoice(hits)
            second_word = choice.second_word.id
            third_word = choice.third_word.id
        # Next the reverese chain keyword -> start
        first_half = []
        first_word = base.first_word.id
        second_word = base.second_word.id
        while first_word != 1:
            first_half.append(first_word)
            hits = Markov.select(
                AND(Markov.q.second_wordID == first_word,
                    Markov.q.third_wordID == second_word))
            hits = list(hits)
            choice = hits[int(random.random() * len(hits))]
            first_word = choice.first_word.id
            second_word = choice.second_word.id
        #Merge the halves together
        first_half.reverse()
        first_half.extend(second_half)
        #self.debug('first_half:%s' % first_half)
        return first_half
Пример #2
0
 def createMarkovChains(self, increment=False):
     """ Generate Markov Chains and store it in the database """
     mt = self._markovify()
     for entry in mt:
         (first, second, third) = [x.id for x in entry]
         #Frist the markov chain
         mw = Markov.select(
             AND(Markov.q.first_wordID == first,
                 Markov.q.second_wordID == second,
                 Markov.q.third_wordID == third))
         markov = list(mw)
         if markov and increment == True:
             markov[0].increment()
         else:
             Markov(first_word=first, second_word=second, third_word=third)
         #Next the Markov Lex chain
         (first, second, third) = [x.main_type.id for x in entry]
         ml = MarkovLex.select(
             AND(MarkovLex.q.first_lexID == first,
                 MarkovLex.q.second_lexID == second,
                 MarkovLex.q.third_lexID == third))
         markovlex = list(ml)
         if markovlex and increment == True:
             markovlex[0].increment()
         else:
             MarkovLex(first_lex=first, second_lex=second, third_lex=third)
Пример #3
0
def main():
    text = ""
    option = int(raw_input("1. URL, 2. plain text:"))
    if option == 1:
        if DEBUG:
            url = "http://www.cs.drexel.edu/~jpopyack/Kasparov.html"
        else:
            url = raw_input("Enter the URL: ")
        text = summartUtil.getHtmlText(url)
    elif option == 2:
        filename = raw_input("Enter the filename: ")
        fin = file(filename)
        for line in fin:
            text += line + "\n"
        fin.close()
    else:
        print "Please enter the right option"
        sys.exit(2)

    MAXGEN = int(raw_input("Enter number of population: "))

    sentences = summartUtil.getSentences(text)
    print "Analyzing text..."
    for s in sentences:
        analyze_sentence(s)

        # Generate graph with
        # dot -Tpng -o test.png test.dot
    outputDot("word.dot", word_dictionary)
    outputDot("grammer.dot", grammer_dictionary)

    markov = Markov(start_word, end_word, start_grammer, end_grammer)
    markov.set_original(text)
    markov.set_blackList(BLACK_LIST)

    print "Generating summary..."
    # print markov.GenerateSentence()

    population = markov.GenerateSummary(MAXGEN)
    print markov.select(population)
Пример #4
0
 def testMarkovChainOccurence(self):
     # We should have 3 for occurence where second word = 名前
     word = Word.byAppeared_name('名前')
     hits = Markov.select(Markov.q.second_wordID == word.id)
     check = list(hits)[0]
     self.assertEquals(check.occurence, 3)
Пример #5
0
 def testMarkovChainDuplicates(self):
     # We should only have 1 instance where second word = 名前
     word = Word.byAppeared_name('名前')
     hits = Markov.select(Markov.q.second_wordID == word.id)
     check = list(hits)
     self.assertEquals(len(check), 1)