Пример #1
0
    def on_status(self, status):

        if ('RT @' not in status.text):
            blob = TB(status.text)
            sent = blob.sentiment
            polarity = blob.polarity
            subjectivity = sent.subjectivity

            tweet_item = {
                'id_str':
                status.id_str,
                'text':
                status.text,
                'polarity':
                polarity,
                'subjectivity':
                subjectivity,
                'username':
                status.user.screen_name,
                'name':
                status.user.name,
                'profile_image_url':
                status.user.profile_image_url,
                'recieved_at':
                datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            }
            store.push(tweet_item)
            print(tweet_item)
Пример #2
0
 def __init__(self, review_text, lang='en'):
     # super(Review, self).__init__()
     # self.arg = arg
     self.text = review_text
     # self.tokens = nltk.word_tokenize(self.text)
     # self.tags = nltk.pos_tag(self.tokens)
     self.tb = TB(self.text)
     self.language = tb.detect_language()
     self.subjectivity = tb.subjectivity
     self.polarity = tb.polarity
Пример #3
0
 def set_sentiment(content: str, range: float) -> bool:
     """ Uses textblob to guauge if some content is
         within a sentiment range. 'range' must be
         in this form: [float(lower), float(upper)].
         The minmax is: Min(-1.0), Max(1.0). 
         Example: 
             Profanity tends to be somewhere between
             -1.0 and 0.0
         Returns a True if the 'content' is within
         the specified range, else false.
     """
     score = TB(content).sentiment[0]
     return (score >= range[0]) and (score <= range[1])
 def __init__(self,review_text,lang='en'):
     # super(Review, self).__init__()
     # self.arg = arg
     self.text = review_text.lower()
     # added expansion by default
     self.set_expanded()
     # added docs for the review
     self.doc = [[x for x in wpt.tokenize(s) if x.isalnum() and len(x)>1] for s in sentence_tokenizer.tokenize(self.text)]
     # added textblob for sentiment values
     self.tb = TB(self.text)
     # self.language = self.tb.detect_language()
     self.subjectivity = self.tb.subjectivity
     self.polarity = self.tb.polarity
Пример #5
0
def language_detection(text):
    tb = TB(text)
    ln = tb.detect_language()
    print "Language detected :",languages[ln]
    # if not english translate it to english
    if ln!='en':
        tnslt = tb.translate(from_lang=ln,to='en')
        # if the text is same, i.e. Google is unable to translate
        # assume that the text is rubbish
        if tnslt == tb:
            print "Gibbrish text!"
    # else return the processed text
    return (str(tnslt),languages[ln])
Пример #6
0
def get_features(tweet):
    tokens = ttok.tokenize(tweet)
    tags = TB(tweet).tags

    features = {}
    features['Length'] = len(tweet)
    features['# of Tokens'] = len(tokens)
    for gram in all_1gram_tokens:
        features['Has "%s"' % gram] = (gram in tweet.lower())
    for gram in all_2gram_tokens:
        features['Has "%s"' % gram] = (gram in tweet.lower())
    for gram in all_3gram_tokens:
        features['Has "%s"' % gram] = (gram in tweet.lower())
    features['Sentiment Polarity'] = TB(tweet).sentiment.polarity
    for i, tok in enumerate(tokens):
        if tok == 'flu':
            if i > 0:
                features['Before "flu"'] = tokens[i - 1]
            if i < (i - 1):
                features['After "flu"'] = tokens[i + 1]
    features['% Personal Pronouns'] = len([w for w in tags if w[1] == "PRP"
                                           ]) / len(tags)
    features['# of Named Entities'] = name_entities(tweet)
    return features
Пример #7
0
 def set_sentiment(content:str, range:float) -> bool:
     score = TB(content).sentiment[0]
     return (score >= range[0]) and (score <= range[1])
Пример #8
0
def translate(text, originallanguage, newlanguage):
    return TB(text).translate(from_lang=originallanguage, to=newlanguage)
Пример #9
0
def word_score(word):
    tb = TB(word)
    return tb.polarity
Пример #10
0
consumer_secret = 'xxxxx'

# access tokens to gain rw access through authentication
access_token = 'xxxx'
access_token_secret = 'xxx'

# Auth variable to handle API handshake and Auth access with twitter
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

# API connection object with limit rate 
api = tweepy.API(auth, wait_on_rate_limit=True)

# variable to search for tweets that contain a specific term
public_tweets = api.search('Pokemon')



#for tweet in public_tweets:
    #print(tweet.text)
    #analysis = TB(tweet.text)
    #print(analysis.sentiment)

with open("sentiment.csv",'w', encoding = 'utf-8-sig', newline='') as tf:
    writer = csv.writer(tf)

    for tweet in public_tweets:
        print(tweet.text)
        analysis = TB(tweet.text)
        print(analysis.sentiment)
        writer.writerow([tweet.text,analysis.sentiment])
Пример #11
0
n_instances = 4000
subj_docs = [(" ".join(sent),'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
obj_docs = [(" ".join(sent),'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
print "subj: %d, obj: %d"%(len(subj_docs),len(obj_docs))

train_subj_docs = subj_docs[:80]
test_subj_docs = subj_docs[80:100]
train_obj_docs = obj_docs[:80]
test_obj_docs = obj_docs[80:100]
training_docs = train_subj_docs+train_obj_docs
testing_docs = test_subj_docs+test_obj_docs

refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)

for i, (feats,label) in enumerate(testing_docs):
    refsets[label].add(i)
    observed = "subj" if TB(feats).subjectivity >= criteria else "obj"
    testsets[observed].add(i)

# for subjectives
print "sub precision:",precision(refsets['subj'],testsets['subj'])
print "sub recall:",recall(refsets['subj'],testsets['subj'])
print "sub f-measure:",f_measure(refsets['subj'],testsets['subj'])

# for objectives
print "obj precision:",precision(refsets['obj'],testsets['obj'])
print "obj recall:",recall(refsets['obj'],testsets['obj'])
print "obj f-measure:",f_measure(refsets['obj'],testsets['obj'])
def is_label_subjective(reviewText):
    """return True if the review is highly subjective"""
    tb = TB(reviewText)
    if tb.subjectivity >= 0.45:
        return True
    return False
# In[27]:


df = pd.read_csv("yelp_academic_dataset/yelp_review.csv",  sep=",",parse_dates=['date'])


# In[ ]:


polarity=list()
subjectivity_list = list()
sentiment_list = list()

for row in df["text"]:
    sentences = str(row)
    blob = TB(sentences)
    polarity.append(blob.sentiment.polarity)
    subjectivity_list.append(blob.sentiment.subjectivity)
    
    if blob.sentiment.polarity == 0:
        X="neutral" 
        sentiment_list.append("neutral")
    elif blob.sentiment.polarity < 0:
        X="negative"
        sentiment_list.append("negative")
    elif blob.sentiment.polarity > 0:
        X="positive"
        sentiment_list.append("positive")
         
df["Polarity"]=polarity
df["sentiment_list"]=sentiment_list
Пример #14
0
 def set_sentiment(_data_obj, range):
     score = TB(_data_obj.text).sentiment[0]
     _data_obj.valid_sentiment_range = (score >= range[0]) and (score <=
                                                                range[1])
Пример #15
0
 def get_sentences(self):
     """returns a list of list of pair of words,pos-tag"""
     return [list(TB(str(y)).tags) for y in self.tb.sentences]