Exemplo n.º 1
0
class MangleVerbs(object):

        def __init__(self):
		self.syllables = CountSyllables()

        def filterVerbs(self):
                fileName = open("verbs.txt", "r")
                data = fileName.read()
                fileName.close()
                data = data.split("\n")

                fileName2 = open("dict.txt", "r")
                commonWords = fileName2.read()
                fileName2.close()
                commonWords = commonWords.split("\n")

                output = open("output.txt", "w")
                                             
                for word in data:
                        word = word.strip().lower()
                        number = self.syllables.count(word)

                        #print word, number
                        
                        if number == -1:
                                number = self.syllables.oldCount(word)

                        if number == 1:
                                if word in commonWords:
                                        print "killed: ", word
                                        continue
                                output.write(word + "\n")
                                print "added: ", word

                                

                
                output.close()
class Lawrence(object):

        def __init__(self):
		self.cs = CountSyllables()
                self.instances = []
        
        def ss(self):
                # sum of squares
                c = self.mean()
                ss = sum((x - c) **2 for x in self.instances)
                return ss

        def stdev(self):
                # standard deviation
                n = len(self.instances)
                if n < 2:
                        return 0
                ss = self.ss()
                pvar = ss / n
                return pvar ** 0.5

        def mean(self):
                count = len(self.instances)
                sum = 0.0
                for num in self.instances:
                        sum += num
                return sum / count

        def fleschKincaid(self, wordCount, syllableCount):
                x = (206.835 - ((1.015 * wordCount) / 1)) - (((84.6 * syllableCount) / wordCount))
                return x

        def loopSentence(self, text):
                self.instances = []
                sentences = text.lower().split('.')
                for sentence in sentences:
                        if len(sentence) < 1: continue
                        words = sentence.split(' ')
                        wordCount = len(words)
                        syllableCount = 0
                        for word in words:
                                syllableCount += self.cs.count(word)
                        self.instances += [self.fleschKincaid(wordCount, syllableCount)]
class SyllableIt(TagIt):
    """
	Extracts syllable counts from either side of a key word.
	"""
    def __init__(self):
        self.syllable = CountSyllables()

    def addTags(self, sentence=["here", "is", "a", "word"], factors=5):
        """"
		Buffer a list of (word, tag) with NAs of any factor
		[(x1,y1), (x2,y2)], 1 ----> [(na,na), (x1,y1), (x2,y2), (na,na)]
		"""
        newVars = []
        for i in range(factors):
            newVars += ["na"]
        sentence += newVars
        newVars += sentence
        return newVars

    def extractTags(self, sentence, keys, factors):
        """
		Return the words around a key
		["na", "I", "run", "na"], "run", 1 -----> ["I", "run", "na"]
		"""
        listOfTags = []
        count = 0
        for i in sentence:
            if i in keys:
                iterNum = 0
                #Get words before key
                for z in range(factors):
                    listOfTags += [
                        str(
                            self.getSyllable(sentence[(count - factors) +
                                                      iterNum]))
                    ]
                    iterNum += 1
                iterNum = 1
                for z in range(factors):
                    listOfTags += [
                        str(self.getSyllable(sentence[count + iterNum]))
                    ]
                    iterNum += 1
                return listOfTags
            count += 1
        return listOfTags

    def processWords(self,
                     sentence="I can not take it much farther.",
                     keys=["further", "farther"],
                     factors=5):
        """Turns a sentence into a list of syllables with buffer of N factors"""
        #Tag sentence
        X = sentence.lower().split(" ")
        X = [x[0] for x in self.tagSentence(sentence)]
        #Add NAs to front and back
        X = self.addTags(X, factors)
        #Reduce to a list of tags
        return self.extractTags(X, keys, factors)

    def getSyllable(self, word):
        return self.syllable.count(word)
 def __init__(self):
     self.syllable = CountSyllables()
        def __init__(self):
		self.syllables = CountSyllables()
        def __init__(self):
		self.cs = CountSyllables()
                self.instances = []