def contentDensity(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag= tools.getPosTag(pylinguistObj) nVerb=0 nNoun=0 nAdjective=0 nAdverb=0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "VERB" : nVerb +=1 #elif word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS" : elif word_clas == "NOUN" : nNoun +=1 #elif word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS" : elif word_clas == "ADJ" : nAdjective +=1 #elif word_clas == "RB" or word_clas == "RBR" or word_clas == "RBS": elif word_clas == "ADV" : nAdverb +=1 contentDensity=10 #print ('somatorio %i'%(nVerb+nNoun+nAdjective+nAdverb)) #print ('dividido por %i'%len(postag)) content_words = nVerb+nNoun+nAdjective+nAdverb function_words = float(len(pylinguistObj.postag)-content_words) content_density = content_words / function_words if function_words else 0 return content_density
def adpPronRatio(pylinguistObj): #Counting pronouns if (pylinguistObj.postag == []): pylinguistObj.postag= tools.getPosTag(pylinguistObj) nPRON=0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "PRON" or word_clas == "PRO-KS" or word_clas == "PRO-KS-REL": nPRON +=1 #print('adjective %i' %nAdjective) pronIncidence=0 try: pronIncidence = nPRON / (float(len(pylinguistObj.postag))/1000) except: pronIncidence = 0 #counting adpositions if (pylinguistObj.postag == []): pylinguistObj.postag= tools.getPosTag(pylinguistObj) nADP=0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "ADP": nADP +=1 #print('adjective %i' %nAdjective) adpIncidence=0 try: adpIncidence = nADP / (float(len(pylinguistObj.postag))/1000) except: adpIncidence = 0 try: ratio = pronIncidence / float(adpIncidence) except: ratio = 0 return ratio
def adpPronRatio(pylinguistObj): #Counting pronouns if (pylinguistObj.postag == []): pylinguistObj.postag = tools.getPosTag(pylinguistObj) nPRON = 0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "PRON" or word_clas == "PRO-KS" or word_clas == "PRO-KS-REL": nPRON += 1 #print('adjective %i' %nAdjective) pronIncidence = 0 try: pronIncidence = nPRON / (float(len(pylinguistObj.postag)) / 1000) except: pronIncidence = 0 #counting adpositions if (pylinguistObj.postag == []): pylinguistObj.postag = tools.getPosTag(pylinguistObj) nADP = 0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "ADP": nADP += 1 #print('adjective %i' %nAdjective) adpIncidence = 0 try: adpIncidence = nADP / (float(len(pylinguistObj.postag)) / 1000) except: adpIncidence = 0 try: ratio = pronIncidence / float(adpIncidence) except: ratio = 0 return ratio
def calcAmbiquity(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag= tools.getPosTag(pylinguistObj) nVerb=0 nNoun=0 nAdjective=0 nAdverb=0 mVerb=0 mNoun=0 mAdjective=0 mAdverb=0 avgMVerb = 0 avgMNoun = 0 avgMAdjective = 0 avgMAdverb = 0 AvgAmbiquity = 0 #Tag Meaning #https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #print(word_clas) if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : nVerb +=1 mVerb += len(wn.synsets(word)) elif word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS" : nNoun +=1 mNoun += len(wn.synsets(word)) elif word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS" : nAdjective +=1 mAdjective += len(wn.synsets(word)) elif word_clas == "RB" or word_clas == "RBR" or word_clas == "RBS": nAdverb +=1 mAdverb += len(wn.synsets(word)) if nVerb > 0: avgMVerb = mVerb / float(nVerb) if nNoun > 0: avgMNoun = mNoun / float(nNoun) if nAdjective > 0: avgMAdjective = mAdjective / float(nAdjective) if nAdverb > 0: avgMAdverb = mAdverb / float(nAdverb) if (nVerb+nNoun+nAdjective+nAdverb) > 0: AvgAmbiquity = (mVerb+mNoun+mAdjective+mAdverb) / float(nVerb+nNoun+nAdjective+nAdverb) return AvgAmbiquity
def calcAmbiquity(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag = tools.getPosTag(pylinguistObj) nVerb = 0 nNoun = 0 nAdjective = 0 nAdverb = 0 mVerb = 0 mNoun = 0 mAdjective = 0 mAdverb = 0 avgMVerb = 0 avgMNoun = 0 avgMAdjective = 0 avgMAdverb = 0 AvgAmbiquity = 0 #Tag Meaning #https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #print(word_clas) if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ": nVerb += 1 mVerb += len(wn.synsets(word)) elif word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS": nNoun += 1 mNoun += len(wn.synsets(word)) elif word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS": nAdjective += 1 mAdjective += len(wn.synsets(word)) elif word_clas == "RB" or word_clas == "RBR" or word_clas == "RBS": nAdverb += 1 mAdverb += len(wn.synsets(word)) if nVerb > 0: avgMVerb = mVerb / float(nVerb) if nNoun > 0: avgMNoun = mNoun / float(nNoun) if nAdjective > 0: avgMAdjective = mAdjective / float(nAdjective) if nAdverb > 0: avgMAdverb = mAdverb / float(nAdverb) if (nVerb + nNoun + nAdjective + nAdverb) > 0: AvgAmbiquity = (mVerb + mNoun + mAdjective + mAdverb) / float(nVerb + nNoun + nAdjective + nAdverb) return AvgAmbiquity
def nounIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag= tools.getPosTag(pylinguistObj) nNoun=0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS" : if word_clas == "NOUN" : nNoun +=1 #print('adjective %i' %nAdjective) noumIncidence=0 try: noumIncidence = nNoun / (float(pylinguistObj.word_count)/1000) except: noumIncidence = 0 return noumIncidence
def nounIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag = tools.getPosTag(pylinguistObj) nNoun = 0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS" : if word_clas == "NOUN": nNoun += 1 #print('adjective %i' %nAdjective) noumIncidence = 0 try: noumIncidence = nNoun / (float(pylinguistObj.word_count) / 1000) except: noumIncidence = 0 return noumIncidence
def verbIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag= tools.getPosTag(pylinguistObj) nVerb=0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "VERB": nVerb +=1 #print('adjective %i' %nAdjective) verbIncidence=0 try: verbIncidence = nVerb / (float(pylinguistObj.word_count)/1000) except: verbIncidence = 0 return verbIncidence
def functionalIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag= tools.getPosTag(pylinguistObj) count=0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "DET" or word_clas == "ADP" or word_clas == "PRON" or word_clas == "CONJ" or word_clas == "IN" : count +=1 #print('adjective %i' %nAdjective) incidence=0 try: incidence = count / (float(pylinguistObj.word_count)/1000) except: incidence = 0 return incidence
def contentIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag= tools.getPosTag(pylinguistObj) count=0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "VERB" or word_clas == "NOUN" or word_clas == "ADJ" or word_clas == "ADV" : count +=1 #print('adjective %i' %nAdjective) incidence=0 try: incidence = count / (float(pylinguistObj.word_count)/1000) except: incidence = 0 return incidence
def pronIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag = tools.getPosTag(pylinguistObj) nPRON = 0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "PRON" or word_clas == "PRO-KS" or word_clas == "PRO-KS-REL": nPRON += 1 #print('adjective %i' %nAdjective) pronIncidence = 0 try: pronIncidence = nPRON / (float(len(pylinguistObj.postag)) / 1000) except: pronIncidence = 0 return pronIncidence
def pronIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag= tools.getPosTag(pylinguistObj) nPRON=0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "PRON" or word_clas == "PRO-KS" or word_clas == "PRO-KS-REL": nPRON +=1 #print('adjective %i' %nAdjective) pronIncidence=0 try: pronIncidence = nPRON / (float(len(pylinguistObj.postag))/1000) except: pronIncidence = 0 return pronIncidence
def advIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag= tools.getPosTag(pylinguistObj) nADV=0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "ADV" or word_clas == "ADV-KS": nADV +=1 #print('adjective %i' %nAdjective) advIncidence=0 try: advIncidence = nADV / (float(pylinguistObj.word_count)/1000) except: advIncidence = 0 return advIncidence
def contentIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag = tools.getPosTag(pylinguistObj) count = 0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "VERB" or word_clas == "NOUN" or word_clas == "ADJ" or word_clas == "ADV": count += 1 #print('adjective %i' %nAdjective) incidence = 0 try: incidence = count / (float(pylinguistObj.word_count) / 1000) except: incidence = 0 return incidence
def advIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag = tools.getPosTag(pylinguistObj) nADV = 0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "ADV" or word_clas == "ADV-KS": nADV += 1 #print('adjective %i' %nAdjective) advIncidence = 0 try: advIncidence = nADV / (float(pylinguistObj.word_count) / 1000) except: advIncidence = 0 return advIncidence
def verbIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag = tools.getPosTag(pylinguistObj) nVerb = 0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "VERB": nVerb += 1 #print('adjective %i' %nAdjective) verbIncidence = 0 try: verbIncidence = nVerb / (float(pylinguistObj.word_count) / 1000) except: verbIncidence = 0 return verbIncidence
def functionalIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag = tools.getPosTag(pylinguistObj) count = 0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" : if word_clas == "DET" or word_clas == "ADP" or word_clas == "PRON" or word_clas == "CONJ" or word_clas == "IN": count += 1 #print('adjective %i' %nAdjective) incidence = 0 try: incidence = count / (float(pylinguistObj.word_count) / 1000) except: incidence = 0 return incidence
def adjectiveIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag = tools.getPosTag(pylinguistObj) nAdjective=0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS" : if word_clas == "ADJ": nAdjective +=1 adjectiveIncidence=0 try: adjectiveIncidence = nAdjective / (float(pylinguistObj.word_count)/1000) except: adjectiveIncidence = 0 return adjectiveIncidence
def adjectiveIncidence(pylinguistObj): if (pylinguistObj.postag == []): pylinguistObj.postag = tools.getPosTag(pylinguistObj) nAdjective = 0 for tag in pylinguistObj.postag: word = tag[0] word_clas = tag[1] #if word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS" : if word_clas == "ADJ": nAdjective += 1 adjectiveIncidence = 0 try: adjectiveIncidence = nAdjective / (float(pylinguistObj.word_count) / 1000) except: adjectiveIncidence = 0 return adjectiveIncidence
def text(self, text): self.types = [] self.tokens = [] self.postag = [] self.text = text #.decode('utf-8','ignore') self.tokens = tools.getTokens(self) #optimization self.postag = tools.getPosTag(self) #Descriptive counts self.tokenized_sentences = descriptive.tokenized_sentences(self) self.word_count = descriptive.word_count(self) self.sentence_count = descriptive.sentence_count(self) self.avg_word_per_sentence = descriptive.avg_word_per_sentence(self) #self.word_length = descriptive.word_length(self) #Doesn't work, for some reason #self.syllable_count = descriptive.syllable_count(self) #self.avg_syllables_per_word = descriptive.avg_syllables_per_word(self) return self
def text(self, text): self.types=[] self.tokens=[] self.postag=[] self.text = text #.decode('utf-8','ignore') self.tokens = tools.getTokens(self) #optimization self.postag = tools.getPosTag(self) #Descriptive counts self.tokenized_sentences = descriptive.tokenized_sentences(self) self.word_count = descriptive.word_count(self) self.sentence_count = descriptive.sentence_count(self) self.avg_word_per_sentence = descriptive.avg_word_per_sentence(self) #self.word_length = descriptive.word_length(self) #Doesn't work, for some reason #self.syllable_count = descriptive.syllable_count(self) #self.avg_syllables_per_word = descriptive.avg_syllables_per_word(self) return self