def on_status(self, status): if ('RT @' not in status.text): blob = TB(status.text) sent = blob.sentiment polarity = blob.polarity subjectivity = sent.subjectivity tweet_item = { 'id_str': status.id_str, 'text': status.text, 'polarity': polarity, 'subjectivity': subjectivity, 'username': status.user.screen_name, 'name': status.user.name, 'profile_image_url': status.user.profile_image_url, 'recieved_at': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") } store.push(tweet_item) print(tweet_item)
def __init__(self, review_text, lang='en'): # super(Review, self).__init__() # self.arg = arg self.text = review_text # self.tokens = nltk.word_tokenize(self.text) # self.tags = nltk.pos_tag(self.tokens) self.tb = TB(self.text) self.language = tb.detect_language() self.subjectivity = tb.subjectivity self.polarity = tb.polarity
def set_sentiment(content: str, range: float) -> bool: """ Uses textblob to guauge if some content is within a sentiment range. 'range' must be in this form: [float(lower), float(upper)]. The minmax is: Min(-1.0), Max(1.0). Example: Profanity tends to be somewhere between -1.0 and 0.0 Returns a True if the 'content' is within the specified range, else false. """ score = TB(content).sentiment[0] return (score >= range[0]) and (score <= range[1])
def __init__(self,review_text,lang='en'): # super(Review, self).__init__() # self.arg = arg self.text = review_text.lower() # added expansion by default self.set_expanded() # added docs for the review self.doc = [[x for x in wpt.tokenize(s) if x.isalnum() and len(x)>1] for s in sentence_tokenizer.tokenize(self.text)] # added textblob for sentiment values self.tb = TB(self.text) # self.language = self.tb.detect_language() self.subjectivity = self.tb.subjectivity self.polarity = self.tb.polarity
def language_detection(text): tb = TB(text) ln = tb.detect_language() print "Language detected :",languages[ln] # if not english translate it to english if ln!='en': tnslt = tb.translate(from_lang=ln,to='en') # if the text is same, i.e. Google is unable to translate # assume that the text is rubbish if tnslt == tb: print "Gibbrish text!" # else return the processed text return (str(tnslt),languages[ln])
def get_features(tweet): tokens = ttok.tokenize(tweet) tags = TB(tweet).tags features = {} features['Length'] = len(tweet) features['# of Tokens'] = len(tokens) for gram in all_1gram_tokens: features['Has "%s"' % gram] = (gram in tweet.lower()) for gram in all_2gram_tokens: features['Has "%s"' % gram] = (gram in tweet.lower()) for gram in all_3gram_tokens: features['Has "%s"' % gram] = (gram in tweet.lower()) features['Sentiment Polarity'] = TB(tweet).sentiment.polarity for i, tok in enumerate(tokens): if tok == 'flu': if i > 0: features['Before "flu"'] = tokens[i - 1] if i < (i - 1): features['After "flu"'] = tokens[i + 1] features['% Personal Pronouns'] = len([w for w in tags if w[1] == "PRP" ]) / len(tags) features['# of Named Entities'] = name_entities(tweet) return features
def set_sentiment(content:str, range:float) -> bool: score = TB(content).sentiment[0] return (score >= range[0]) and (score <= range[1])
def translate(text, originallanguage, newlanguage): return TB(text).translate(from_lang=originallanguage, to=newlanguage)
def word_score(word): tb = TB(word) return tb.polarity
consumer_secret = 'xxxxx' # access tokens to gain rw access through authentication access_token = 'xxxx' access_token_secret = 'xxx' # Auth variable to handle API handshake and Auth access with twitter auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) # API connection object with limit rate api = tweepy.API(auth, wait_on_rate_limit=True) # variable to search for tweets that contain a specific term public_tweets = api.search('Pokemon') #for tweet in public_tweets: #print(tweet.text) #analysis = TB(tweet.text) #print(analysis.sentiment) with open("sentiment.csv",'w', encoding = 'utf-8-sig', newline='') as tf: writer = csv.writer(tf) for tweet in public_tweets: print(tweet.text) analysis = TB(tweet.text) print(analysis.sentiment) writer.writerow([tweet.text,analysis.sentiment])
n_instances = 4000 subj_docs = [(" ".join(sent),'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]] obj_docs = [(" ".join(sent),'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]] print "subj: %d, obj: %d"%(len(subj_docs),len(obj_docs)) train_subj_docs = subj_docs[:80] test_subj_docs = subj_docs[80:100] train_obj_docs = obj_docs[:80] test_obj_docs = obj_docs[80:100] training_docs = train_subj_docs+train_obj_docs testing_docs = test_subj_docs+test_obj_docs refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (feats,label) in enumerate(testing_docs): refsets[label].add(i) observed = "subj" if TB(feats).subjectivity >= criteria else "obj" testsets[observed].add(i) # for subjectives print "sub precision:",precision(refsets['subj'],testsets['subj']) print "sub recall:",recall(refsets['subj'],testsets['subj']) print "sub f-measure:",f_measure(refsets['subj'],testsets['subj']) # for objectives print "obj precision:",precision(refsets['obj'],testsets['obj']) print "obj recall:",recall(refsets['obj'],testsets['obj']) print "obj f-measure:",f_measure(refsets['obj'],testsets['obj'])
def is_label_subjective(reviewText): """return True if the review is highly subjective""" tb = TB(reviewText) if tb.subjectivity >= 0.45: return True return False
# In[27]: df = pd.read_csv("yelp_academic_dataset/yelp_review.csv", sep=",",parse_dates=['date']) # In[ ]: polarity=list() subjectivity_list = list() sentiment_list = list() for row in df["text"]: sentences = str(row) blob = TB(sentences) polarity.append(blob.sentiment.polarity) subjectivity_list.append(blob.sentiment.subjectivity) if blob.sentiment.polarity == 0: X="neutral" sentiment_list.append("neutral") elif blob.sentiment.polarity < 0: X="negative" sentiment_list.append("negative") elif blob.sentiment.polarity > 0: X="positive" sentiment_list.append("positive") df["Polarity"]=polarity df["sentiment_list"]=sentiment_list
def set_sentiment(_data_obj, range): score = TB(_data_obj.text).sentiment[0] _data_obj.valid_sentiment_range = (score >= range[0]) and (score <= range[1])
def get_sentences(self): """returns a list of list of pair of words,pos-tag""" return [list(TB(str(y)).tags) for y in self.tb.sentences]