Пример #1
0
def sentence_length (pylinguistObj):

	temp = pylinguistObj
	#pylinguistObj.tokens = tools.getTokens(value)

	array = []
	for key, value in enumerate(pylinguistObj.tokenized_sentences):
		temp.text = value
		tokens=tools.getTokens(temp)
		array.append(len(tokens)) 

	#returns an int array containing the size of all sentences
	return array
Пример #2
0
def sentence_sized_30 (pylinguistObj):

	temp = pylinguistObj
	#pylinguistObj.tokens = tools.getTokens(value)
	count= 0
	for key, value in enumerate(pylinguistObj.tokenized_sentences):
		temp.text = value
		#print value
		tokens=tools.getTokens(temp)
		if len(tokens) > 30:
			count = count+1
		total = (key + 1) 

	percent = (count*100)/total
	return percent
Пример #3
0
def word_count (pylinguistObj):
	if (pylinguistObj.tokens == []):
		pylinguistObj.tokens = tools.getTokens(pylinguistObj.text)
	count = len(pylinguistObj.tokens)

	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#punctuations are not words
		if word_clas == "." :
			count -=1
			#print word

	pylinguistObj.word_count = count
	return count
Пример #4
0
    def text(self, text):
        self.types = []
        self.tokens = []
        self.postag = []

        self.text = text
        #.decode('utf-8','ignore')
        self.tokens = tools.getTokens(self)

        #optimization
        self.postag = tools.getPosTag(self)

        #Descriptive counts
        self.tokenized_sentences = descriptive.tokenized_sentences(self)
        self.word_count = descriptive.word_count(self)
        self.sentence_count = descriptive.sentence_count(self)
        self.avg_word_per_sentence = descriptive.avg_word_per_sentence(self)
        #self.word_length = descriptive.word_length(self)                       #Doesn't work, for some reason
        #self.syllable_count = descriptive.syllable_count(self)
        #self.avg_syllables_per_word = descriptive.avg_syllables_per_word(self)

        return self
Пример #5
0
    def text(self, text):
        self.types=[]
        self.tokens=[]
        self.postag=[]

        self.text = text
        #.decode('utf-8','ignore')
        self.tokens = tools.getTokens(self)

        #optimization
        self.postag = tools.getPosTag(self)

       
        #Descriptive counts       
        self.tokenized_sentences = descriptive.tokenized_sentences(self)
        self.word_count = descriptive.word_count(self)
        self.sentence_count = descriptive.sentence_count(self)
        self.avg_word_per_sentence = descriptive.avg_word_per_sentence(self)
        #self.word_length = descriptive.word_length(self)                       #Doesn't work, for some reason
        #self.syllable_count = descriptive.syllable_count(self)    
        #self.avg_syllables_per_word = descriptive.avg_syllables_per_word(self)

        return self
Пример #6
0
def orthographic_neighborhood (pylinguistObj):
	if (pylinguistObj.tokens == []):
		pylinguistObj.tokens = tools.getTokens(pylinguistObj.text)
	

	array = []
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#punctuations are not words
		if word_clas == "." :
			array
		else:
			array.append(word)

	count = 0
	for index in range(len(array)):
		for x in range(1,20):
			if((index + x) < len(array)):
				if(array[index] == array[index+x]):
					count = count + 1
	
	print count
	return count