Exemplo n.º 1
0
    def __init__(self, y, typenum):
        # Takes in a string array (and haiku position) and outputs 
        self.typenum = typenum

        self.wordarray = y
        self.typearray = [dictionary.wordtype(a) for a in y]
        self.syllablearray = [dictionary.syllablecnt(a) for a in y]
Exemplo n.º 2
0
 def __init__(self,word):
     self.word = word # the word itself
     self.syllables = dictionary.syllablecnt(self.word) #number of syllables
     self.wordtype = dictionary.wordtype(self.word) # lexical category
     self.is_word = (True in self.wordtype)
     self.occurrences = 0 # counts the number of times word has occurred in the training data
     self.adj_dict = {} # dictionary from adjacent word to adjancency coefficient
Exemplo n.º 3
0
 def update(self,haiku, typenum):
     """Updates self.occurrences and self.adj_dict based on data from haiku."""
     self.occurrences += 1
     for i in range(2):
         for x in (haiku.triple[i]).wordarray:
             if (self.wordtype == dictionary.wordtype(x) and 
                 dictionary.word_filter(x) != self.word):
                 self.update_adj_dict(x, i==typenum)
Exemplo n.º 4
0
def train_haiku(haiku, monograms, bigrams, digrams, line_types):
    """Takes in a new Haiku as well as three dictionaries: one of 
    Monogram objects (keys are words), one of Bi-Gram objects (keys are
    phrases), and one of Line_type objects (keys are skeletons). 
    Updates the three dictionaries, returns None."""

    for line in haiku.triple:
        words = line.wordarray
        
        # updates line_types
        abstract_skeleton = (tuple((tuple(dictionary.wordtype(a)),
           dictionary.syllablecnt(a)) for a in words), line.typenum)
        if abstract_skeleton in line_types:
            line_types[abstract_skeleton].update()
        else:
            abstrlin = Line_type(abstract_skeleton[0], abstract_skeleton[1])
            abstrlin.update()
            line_types[abstrlin.skeleton] = abstrlin

        # updates individual monograms
        for i in range(len(words)):
            w = dictionary.word_filter(words[i])
            if dictionary.is_word(w):
                # print (w, "is a word")
                if w in monograms:
                    monograms[w].update(haiku, line.typenum)
                else:
                    new_mono = Monogram(w)
                    new_mono.update(haiku, line.typenum)
                    monograms[w] = new_mono
                    
        #if len(words) == 0:
        #    print("empty word")
        #    exit()   
        
        #updates bigrams and digrams      
        for i in range(len(words)):
            if i < len(words)-1:
                (w_1, w_2)=(dictionary.word_filter(words[i]),
                            dictionary.word_filter(words[i+1]))
                
                if i == 0:
                    #if w_1 == "":
                    #    print("empty filtered word")
                    #    print(words)
                    
                    if ("\n", w_1) in digrams:
                        digrams[("\n", w_1)] += 1
                    else: 
                        digrams[("\n", w_1)] = 1
                
                if (w_1, w_2) in digrams:
                    digrams[(w_1, w_2)] += 1
                else:
                    digrams[(w_1, w_2)] = 1

                if (dictionary.is_word(w_1) and dictionary.is_word(w_2)):
                    if (w_1, w_2) in bigrams:
                        bigrams[(w_1, w_2)].update()
                    else:
                        new_bi = Bi_Gram(w_1, w_2)
                        new_bi.update()
                        bigrams[(w_1, w_2)] = new_bi
            else:
                w = dictionary.word_filter(words[i])
                if (w, "/n") in digrams:
                    digrams[(w, "\n")] +=1
                else:
                    digrams[(w, "\n")] = 1