Exemplos de getPosTag em Python, exemplos de tools.getPosTag em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: disfluencies.py Projeto: vwoloszyn/pylinguistics

def contentDensity(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag= tools.getPosTag(pylinguistObj)
    
    nVerb=0
    nNoun=0
    nAdjective=0
    nAdverb=0
    
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "VERB" :
            nVerb +=1
        #elif word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS" :
        elif word_clas == "NOUN"  :
            nNoun +=1
        #elif word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS" :
        elif word_clas == "ADJ" :
            nAdjective +=1
        #elif word_clas == "RB" or word_clas == "RBR" or word_clas == "RBS":
        elif word_clas == "ADV" :
            nAdverb +=1
    
    contentDensity=10
    #print ('somatorio %i'%(nVerb+nNoun+nAdjective+nAdverb))
    #print ('dividido por %i'%len(postag))

    content_words = nVerb+nNoun+nAdjective+nAdverb
    function_words = float(len(pylinguistObj.postag)-content_words)
    
    content_density = content_words / function_words if function_words else 0
        
    return content_density

Exemplo n.º 2

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def adpPronRatio(pylinguistObj):
	#Counting pronouns
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	nPRON=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "PRON" or word_clas == "PRO-KS" or word_clas == "PRO-KS-REL":
			nPRON +=1
	#print('adjective %i' %nAdjective)
	pronIncidence=0

	try:
		pronIncidence = nPRON / (float(len(pylinguistObj.postag))/1000)
	except:
		pronIncidence = 0
	
	 
	#counting adpositions
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	nADP=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "ADP":
			nADP +=1
	#print('adjective %i' %nAdjective)
	adpIncidence=0

	try:
		adpIncidence = nADP / (float(len(pylinguistObj.postag))/1000)
	except:
		adpIncidence = 0
	
	try:
		ratio = pronIncidence / float(adpIncidence)
	except:
		ratio = 0

	return ratio

Exemplo n.º 3

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def adpPronRatio(pylinguistObj):
    #Counting pronouns
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nPRON = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "PRON" or word_clas == "PRO-KS" or word_clas == "PRO-KS-REL":
            nPRON += 1
    #print('adjective %i' %nAdjective)
    pronIncidence = 0

    try:
        pronIncidence = nPRON / (float(len(pylinguistObj.postag)) / 1000)
    except:
        pronIncidence = 0

    #counting adpositions
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nADP = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "ADP":
            nADP += 1
    #print('adjective %i' %nAdjective)
    adpIncidence = 0

    try:
        adpIncidence = nADP / (float(len(pylinguistObj.postag)) / 1000)
    except:
        adpIncidence = 0

    try:
        ratio = pronIncidence / float(adpIncidence)
    except:
        ratio = 0

    return ratio

Exemplo n.º 4

0

Exibir arquivo

Arquivo: redability.py Projeto: vwoloszyn/pylinguistics

def calcAmbiquity(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag= tools.getPosTag(pylinguistObj)


    nVerb=0
    nNoun=0
    nAdjective=0
    nAdverb=0

    mVerb=0
    mNoun=0
    mAdjective=0
    mAdverb=0

    avgMVerb = 0
    avgMNoun = 0
    avgMAdjective = 0
    avgMAdverb = 0

    AvgAmbiquity = 0


    #Tag Meaning
    #https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html

    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #print(word_clas)
        if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
            nVerb +=1
            mVerb += len(wn.synsets(word))
        elif word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS" :
            nNoun +=1
            mNoun += len(wn.synsets(word))
        elif word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS" :
            nAdjective +=1
            mAdjective += len(wn.synsets(word))
        elif word_clas == "RB" or word_clas == "RBR" or word_clas == "RBS":
            nAdverb +=1
            mAdverb += len(wn.synsets(word))


    if nVerb > 0: avgMVerb =  mVerb / float(nVerb)
    if nNoun > 0: avgMNoun = mNoun / float(nNoun)
    if nAdjective > 0: avgMAdjective = mAdjective / float(nAdjective)
    if nAdverb > 0: avgMAdverb = mAdverb / float(nAdverb)

    if (nVerb+nNoun+nAdjective+nAdverb) > 0: AvgAmbiquity = (mVerb+mNoun+mAdjective+mAdverb) / float(nVerb+nNoun+nAdjective+nAdverb)
    return AvgAmbiquity

Exemplo n.º 5

0

Exibir arquivo

def calcAmbiquity(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nVerb = 0
    nNoun = 0
    nAdjective = 0
    nAdverb = 0

    mVerb = 0
    mNoun = 0
    mAdjective = 0
    mAdverb = 0

    avgMVerb = 0
    avgMNoun = 0
    avgMAdjective = 0
    avgMAdverb = 0

    AvgAmbiquity = 0

    #Tag Meaning
    #https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html

    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #print(word_clas)
        if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ":
            nVerb += 1
            mVerb += len(wn.synsets(word))
        elif word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS":
            nNoun += 1
            mNoun += len(wn.synsets(word))
        elif word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS":
            nAdjective += 1
            mAdjective += len(wn.synsets(word))
        elif word_clas == "RB" or word_clas == "RBR" or word_clas == "RBS":
            nAdverb += 1
            mAdverb += len(wn.synsets(word))

    if nVerb > 0: avgMVerb = mVerb / float(nVerb)
    if nNoun > 0: avgMNoun = mNoun / float(nNoun)
    if nAdjective > 0: avgMAdjective = mAdjective / float(nAdjective)
    if nAdverb > 0: avgMAdverb = mAdverb / float(nAdverb)

    if (nVerb + nNoun + nAdjective + nAdverb) > 0:
        AvgAmbiquity = (mVerb + mNoun + mAdjective +
                        mAdverb) / float(nVerb + nNoun + nAdjective + nAdverb)
    return AvgAmbiquity

Exemplo n.º 6

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def nounIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	nNoun=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS" :
		if word_clas == "NOUN" :
			nNoun +=1
	#print('adjective %i' %nAdjective)
	noumIncidence=0
	try:
		noumIncidence = nNoun / (float(pylinguistObj.word_count)/1000)
	except:
		noumIncidence = 0

	return noumIncidence

Exemplo n.º 7

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def nounIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nNoun = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "NN" or word_clas == "NNS" or word_clas == "NNP" or word_clas == "NNPS" :
        if word_clas == "NOUN":
            nNoun += 1
    #print('adjective %i' %nAdjective)
    noumIncidence = 0
    try:
        noumIncidence = nNoun / (float(pylinguistObj.word_count) / 1000)
    except:
        noumIncidence = 0

    return noumIncidence

Exemplo n.º 8

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def verbIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	nVerb=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "VERB":
			nVerb +=1
	#print('adjective %i' %nAdjective)
	verbIncidence=0

	try:
		verbIncidence = nVerb / (float(pylinguistObj.word_count)/1000)
	except:
		verbIncidence = 0
	
	return verbIncidence

Exemplo n.º 9

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def functionalIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	count=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "DET" or word_clas == "ADP" or word_clas == "PRON" or word_clas == "CONJ" or word_clas == "IN" :
			count +=1
	#print('adjective %i' %nAdjective)
	incidence=0

	try:
		incidence = count / (float(pylinguistObj.word_count)/1000)
	except:
		incidence = 0
	
	return incidence

Exemplo n.º 10

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def contentIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	count=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "VERB" or word_clas == "NOUN" or word_clas == "ADJ" or word_clas == "ADV" :
			count +=1
	#print('adjective %i' %nAdjective)
	incidence=0

	try:
		incidence = count / (float(pylinguistObj.word_count)/1000)
	except:
		incidence = 0
	
	return incidence

Exemplo n.º 11

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def pronIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nPRON = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "PRON" or word_clas == "PRO-KS" or word_clas == "PRO-KS-REL":
            nPRON += 1
    #print('adjective %i' %nAdjective)
    pronIncidence = 0

    try:
        pronIncidence = nPRON / (float(len(pylinguistObj.postag)) / 1000)
    except:
        pronIncidence = 0

    return pronIncidence

Exemplo n.º 12

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def pronIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	nPRON=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "PRON" or word_clas == "PRO-KS" or word_clas == "PRO-KS-REL":
			nPRON +=1
	#print('adjective %i' %nAdjective)
	pronIncidence=0

	try:
		pronIncidence = nPRON / (float(len(pylinguistObj.postag))/1000)
	except:
		pronIncidence = 0
	
	return pronIncidence

Exemplo n.º 13

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def advIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag= tools.getPosTag(pylinguistObj)

	nADV=0
	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
		if word_clas == "ADV" or word_clas == "ADV-KS":
			nADV +=1
	#print('adjective %i' %nAdjective)
	advIncidence=0

	try:
		advIncidence = nADV / (float(pylinguistObj.word_count)/1000)
	except:
		advIncidence = 0
	
	return advIncidence

Exemplo n.º 14

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def contentIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    count = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "VERB" or word_clas == "NOUN" or word_clas == "ADJ" or word_clas == "ADV":
            count += 1
    #print('adjective %i' %nAdjective)
    incidence = 0

    try:
        incidence = count / (float(pylinguistObj.word_count) / 1000)
    except:
        incidence = 0

    return incidence

Exemplo n.º 15

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def advIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nADV = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "ADV" or word_clas == "ADV-KS":
            nADV += 1
    #print('adjective %i' %nAdjective)
    advIncidence = 0

    try:
        advIncidence = nADV / (float(pylinguistObj.word_count) / 1000)
    except:
        advIncidence = 0

    return advIncidence

Exemplo n.º 16

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def verbIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nVerb = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "VERB":
            nVerb += 1
    #print('adjective %i' %nAdjective)
    verbIncidence = 0

    try:
        verbIncidence = nVerb / (float(pylinguistObj.word_count) / 1000)
    except:
        verbIncidence = 0

    return verbIncidence

Exemplo n.º 17

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def functionalIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    count = 0
    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "VB" or word_clas == "VBD" or word_clas == "VBG" or word_clas == "VBN" or word_clas == "VBP" or word_clas == "VBZ" :
        if word_clas == "DET" or word_clas == "ADP" or word_clas == "PRON" or word_clas == "CONJ" or word_clas == "IN":
            count += 1
    #print('adjective %i' %nAdjective)
    incidence = 0

    try:
        incidence = count / (float(pylinguistObj.word_count) / 1000)
    except:
        incidence = 0

    return incidence

Exemplo n.º 18

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def adjectiveIncidence(pylinguistObj):
	if (pylinguistObj.postag == []):
		pylinguistObj.postag = tools.getPosTag(pylinguistObj)


	nAdjective=0

	for tag in pylinguistObj.postag:
		word = tag[0]
		word_clas = tag[1]
		#if word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS" :
		if word_clas == "ADJ":
			nAdjective +=1

	adjectiveIncidence=0
	try:
		adjectiveIncidence = nAdjective / (float(pylinguistObj.word_count)/1000)
	except:
		adjectiveIncidence = 0

	return adjectiveIncidence

Exemplo n.º 19

0

Exibir arquivo

Arquivo: wordInformation.py Projeto: vwoloszyn/pylinguistics

def adjectiveIncidence(pylinguistObj):
    if (pylinguistObj.postag == []):
        pylinguistObj.postag = tools.getPosTag(pylinguistObj)

    nAdjective = 0

    for tag in pylinguistObj.postag:
        word = tag[0]
        word_clas = tag[1]
        #if word_clas == "JJ" or word_clas == "JJR" or word_clas == "JJS" :
        if word_clas == "ADJ":
            nAdjective += 1

    adjectiveIncidence = 0
    try:
        adjectiveIncidence = nAdjective / (float(pylinguistObj.word_count) /
                                           1000)
    except:
        adjectiveIncidence = 0

    return adjectiveIncidence

Exemplo n.º 20

0

Exibir arquivo

    def text(self, text):
        self.types = []
        self.tokens = []
        self.postag = []

        self.text = text
        #.decode('utf-8','ignore')
        self.tokens = tools.getTokens(self)

        #optimization
        self.postag = tools.getPosTag(self)

        #Descriptive counts
        self.tokenized_sentences = descriptive.tokenized_sentences(self)
        self.word_count = descriptive.word_count(self)
        self.sentence_count = descriptive.sentence_count(self)
        self.avg_word_per_sentence = descriptive.avg_word_per_sentence(self)
        #self.word_length = descriptive.word_length(self)                       #Doesn't work, for some reason
        #self.syllable_count = descriptive.syllable_count(self)
        #self.avg_syllables_per_word = descriptive.avg_syllables_per_word(self)

        return self

Exemplo n.º 21

0

Exibir arquivo

Arquivo: Pylinguistics.py Projeto: vwoloszyn/pylinguistics

    def text(self, text):
        self.types=[]
        self.tokens=[]
        self.postag=[]

        self.text = text
        #.decode('utf-8','ignore')
        self.tokens = tools.getTokens(self)

        #optimization
        self.postag = tools.getPosTag(self)

       
        #Descriptive counts       
        self.tokenized_sentences = descriptive.tokenized_sentences(self)
        self.word_count = descriptive.word_count(self)
        self.sentence_count = descriptive.sentence_count(self)
        self.avg_word_per_sentence = descriptive.avg_word_per_sentence(self)
        #self.word_length = descriptive.word_length(self)                       #Doesn't work, for some reason
        #self.syllable_count = descriptive.syllable_count(self)    
        #self.avg_syllables_per_word = descriptive.avg_syllables_per_word(self)

        return self