Пример #1
0
# Good reasons:
# 1) It's nicer to have an XML data set that a binary pickled file.
# 2) The classifier is domain-specific.
#    If it is trained on book reviews, it will do well for book reviews.
#    If it is trained on Twitter messages, it will do well for Twitter messages.
#    A classifier trained on Twitter messages may perform very poorly on book reviews.
#    A lexicon of adjectives and their score "should be" cross-domain.
# Bad reasons:
# 1) Intuitively, we don't trust machines and we think we can do better.

# Load the sentiment lexicon.
sentiment = {}
for row in Datasheet.load("sentiment.csv - Sheet 1.csv", headers=True):
    scores = [float(x) for x in row[3:] if x != ""]  # Exclude empty fields.
    if scores:
        sentiment[row[0]] = avg(scores)
# Inherit the score of each adjective to the inflected forms of the adjective.
# If parfait = +1.0, then parfaite = +1.0 and parfaites = +1.0.
for lemma, forms in Datasheet.load("adj-fr.csv"):
    for form in forms.split(","):
        if lemma in sentiment:
            sentiment[form] = sentiment[lemma]


def positive(review, threshold=0.0):
    """ Returns True if the given review is positive,
        based on the average sentiment score of the adjectives in the text.
    """
    score = 0.0
    n = 0
    for w in review.replace("\n", " ").split(" "):
from pattern.en import parse
from pattern.en import pprint
from pattern.en import sentiment, polarity, subjectivity, positive
from pattern.en import wordnet, ADJECTIVE

#s = "Poland says the combination of a second wave of COVID-19 with flu season could create ""a lot of confusion"" because of their overlap in symptoms and put a heavy strain on the health care system. "
'''
for word in ("amazing", "horrible", "public"):
    print(word, sentiment(word))

print(sentiment(
    "The movie attempts to be surreal by incorporating time travel and various time paradoxes,"
    "but it's presented in such a ridiculous way it's seriously boring."))
'''
#s="<p>In a matter of weeks, the coronavirus has spiralled from a handful of cases in China to what many experts fear will become the next global pandemic. "
s = "Poland says the combination of a second wave of COVID-19 with flu season could create " "a lot of confusion" " because of their overlap in symptoms and put a heavy strain on the health care system. "

print(sentiment(s))
for chunk, polarity, subjectivity, label in sentiment(s).assessments:
    print(chunk, polarity, subjectivity, label)

from pattern.metrics import avg
a = sentiment(s).assessments
score1 = avg([p for chunk, p, s, label in a if label is None])
print(score1)

print(sentiment("fear"))
#print(wordnet.sentiwordnet["horrible"])
#'''
Пример #3
0
# For fine-grained analysis, 
# the return value of sentiment() has a special "assessments" property.
# Each assessment is a (chunk, polarity, subjectivity, label)-tuple,
# where chunk is a list of words (e.g., "not very good").

# The label offers additional meta-information.
# For example, its value is MOOD for emoticons:

s = "amazing... :/"
print sentiment(s)
for chunk, polarity, subjectivity, label in sentiment(s).assessments:
    print chunk, polarity, subjectivity, label
    
# Observe the output.
# The average sentiment is positive because the expression contains "amazing".
# However, the smiley is slightly negative, hinting at the author's bad mood.
# He or she might be using sarcasm.
# We could work this out from the fine-grained analysis.

from pattern.metrics import avg
from pattern.en import MOOD

a = sentiment(s).assessments

score1 = avg([p for chunk, p, s, label in a if label is None]) # average polarity for words
score2 = avg([p for chunk, p, s, label in a if label is MOOD]) # average polarity for emoticons

if score1 > 0 and score2 < 0:
    print "...sarcasm?"
Пример #4
0
# For fine-grained analysis, 
# the return value of sentiment() has a special "assessments" property.
# Each assessment is a (chunk, polarity, subjectivity, label)-tuple,
# where chunk is a list of words (e.g., "not very good").

# The label offers additional meta-information.
# For example, its value is MOOD for emoticons:

s = "amazing... :/"
print sentiment(s)
for chunk, polarity, subjectivity, label in sentiment(s).assessments:
    print chunk, polarity, subjectivity, label
    
# Observe the output.
# The average sentiment is positive because the expression contains "amazing".
# However, the smiley is slightly negative, hinting at the author's bad mood.
# He or she might be using sarcasm.
# We could work this out from the fine-grained analysis.

from pattern.metrics import avg
from pattern.en import MOOD

a = sentiment(s).assessments

score1 = avg([p for chunk, p, s, label in a if label is None]) # average polarity for words
score2 = avg([p for chunk, p, s, label in a if label is MOOD]) # average polarity for emoticons

if score1 > 0 and score2 < 0:
    print "...sarcasm?"
 def variance(cluster):
     return avg([distance(centroid(cluster), v) for v in cluster])
Пример #6
0
# the return value of sentiment() has a special "assessments" property.
# Each assessment is a (chunk, polarity, subjectivity, label)-tuple,
# where chunk is a list of words (e.g., "not very good").

# The label offers additional meta-information.
# For example, its value is MOOD for emoticons:

s = "amazing... :/"
print(sentiment(s))
for chunk, polarity, subjectivity, label in sentiment(s).assessments:
    print(chunk, polarity, subjectivity, label)

# Observe the output.
# The average sentiment is positive because the expression contains "amazing".
# However, the smiley is slightly negative, hinting at the author's bad mood.
# He or she might be using sarcasm.
# We could work this out from the fine-grained analysis.

from pattern.metrics import avg
from pattern.en import mood

a = sentiment(s).assessments

# average polarity for words
score1 = avg([p for chunk, p, s, label in a if label is None])
# average polarity for emoticons
score2 = avg([p for chunk, p, s, label in a if label is mood])

if score1 > 0 and score2 < 0:
    print("...sarcasm?")
Пример #7
0
from pattern.db import Datasheet
from pattern.metrics import avg

# This is just the stuff from 5-annotation.py, without the tests.
# You can bundle it in an application for predicting sentiment in French text.

sentiment = {}
for row in Datasheet.load("sentiment.csv - Sheet 1.csv", headers=True):
    scores = [float(x) for x in row[3:] if x != ""]
    if scores:
        sentiment[row[0]] = avg(scores)

for lemma, forms in Datasheet.load("adj-fr.csv"):
    for form in forms.split(","):
        if lemma in sentiment:
            sentiment[form] = sentiment[lemma]


def positive(review, threshold=0.0):
    """ Returns True if the given review is positive,
        based on the average sentiment score of the adjectives in the text.
    """
    score = 0.0
    n = 0
    for w in review.replace("\n", " ").split(" "):
        w = w.lower()
        w = w.strip(",.!?")
        if w in sentiment:
            score += sentiment[w]
            n += 1
    return score / (n or 1) > threshold