def on_success(self, data): sentiment(Data, data['text'], negative, positive, twitter) #save timestamp, new data for positive, negative, neutral timestamp = twitter.get_user_timeline(screen_name='@realDonaldTrump') Data[5] = timestamp[0]['created_at'] update = (dataPath, "w") update.write = (Data)
def drive(): mentions = api.mentions_timeline(retrieve_last_seen_id(FILE)) for mention in reversed(mentions): if mention.in_reply_to_status_id is not None: print('@mention used replying to a tweet') print(mention.id) print(mention.text) test = api.get_status(mention.in_reply_to_status_id) print(test.text) senti = s.sentiment(test.text) review = 'positive' if senti[0] == 'pos' else 'negative' api.update_status('@' + mention.author.screen_name + ' analysis:' + review + "\nand variability: " + str(senti[1]), in_reply_to_status_id=mention.id) else: print('@Mention used without replying to any tweet.') print(mention.id) print(mention.text) api.update_status('@' + mention.author.screen_name + " please tweet in reply to some another tweet.", in_reply_to_status_id=mention.id) time.sleep(5) store_last_seen_id(mention.id, FILE)
def process_news(): s = open('online_khabar.txt', 'r') data = s.read() d = data.split('\n') cal = [] news = News.objects(tested="False").first() # Actual detection being done here using AI . senti = sentiment(news.news_title) total_s = [] counter = 0 if 'Sidha Kura' in news.news_title: news.update(tested="True", cause="News looks irrelevant", similarlity="0.5") else: for i in range(len(d) - 1): cal.append(news.news_title) cal.append(d[counter]) # Looks if the scraped news is in other known trusted site . s = SequenceMatcher(None, cal[0], cal[1]).ratio() total_s.append(s) counter += 1 cal = [] similarlity = max(total_s) * 100 print(similarlity) news.update(tested="True", cause="News not in trusted site", similarlity=str(similarlity))
def calculate_accuracy(pos_data_set,neg_data_set): data = open('data_sets/test_data.txt','r').read() lines = data.split('\n') sentiment_score = 0; for line in lines: if(line == ""): print("") else: end = line.find(":") tweet = line[:end] score = line[len(line)-1:] is_pos = sentiment.sentiment(tweet,pos_data_set,neg_data_set) if(is_pos == "positive"): is_pos = "Y" else: is_pos = "N" if(is_pos == score): sentiment_score += 1 size = len(lines)-1 final_score = sentiment_score/size return final_score
def polarize(): verses = Verse.query.all() for v in verses: v.polarity = sentiment(v.scripture_text) print("%6.8f %s" % (v.polarity, v.scripture_text)) db.session.commit() return "Done"
def polarize(): verses = Verse.query.all() for v in verses: v.polarity = sentiment(v.scripture_text) print("%6.8f %s" % (v.polarity, v.scripture_text)) db.session.commit() return "Done"
def classify_sentiment(sentence): # create list to store multiple polarity negative = [] positive = [] # dictionary method polarity = sentiment.sentiment(sentence) # print "dictionary method-> "+polarity if polarity == POSITIVE: positive.append(POSITIVE) elif polarity == NEGATIVE: negative.append(NEGATIVE) non_alphanumeric_cleaning = re.sub(ur"[^\w\d'\s]+", "", sentence) li = ['naive_bayes', 'svm'] for model in li: try: filepath = os.path.join(MODEL_PATH, str(model) + PKL) # Check whether model is trained or not if (os.path.exists(filepath) == False): return NO_MODEL_EXIST prediction = predict_sentiment(non_alphanumeric_cleaning, filepath) if prediction[LABEL] == NEGATIVE: negative.append(NEGATIVE) elif prediction[LABEL] == POSITIVE: positive.append(POSITIVE) except Exception as e: logging.exception(e.message) pass if len(negative) > 1: return NEGATIVE elif len(positive) > 1: return POSITIVE else: return NEUTRAL
def twitter_data_to_json(): tweets = [] list_dicts = [] skippy = 0 with open("twitterdata/jetblue_twitter.json", "r", encoding="utf-8") as f: load = f.read() tweets = json.loads(load) # get list of tweets for tweet in tweets: try: sent_mag = sentiment(tweet["review"]) except Exception as e: print(e) skippy += 1 continue # make sentiment/magnitude into a tuple list_dicts.append({ "date" : tweet["date"], "sentiment" : sent_mag[0], "magnitude" : sent_mag[1] }) count = len(list_dicts) if count % 20 == 0: print(f"collected {count} reviews!") # tweet[0] is actual tweet / tweet[1] is date with open("more_twitter_data.json", "w+", encoding="utf-8") as f: f.write(to_json_string(list_dicts)) print(list_dicts) print(skippy) return list_dicts
def score_each_tweet(tweet_file, sent_lib): with open(tweet_file) as tf: for line in tf: tweet = json.loads(line , 'utf=8') if 'text' in tweet.keys(): score = sentiment(tweet['text'], sent_lib) print score
def home(number): videoId = number comments = CE.commentExtract(videoId) if comments == "nocomment": return "nocomment" psent, nsent = CE.sentiment(comments) result = FS.fancySentiment(comments, videoId) return jsonify(positive=str(psent), negative=str(nsent))
def get_sentiment(): message = request.args.get('m') return Response( response=json.dumps({ 'sentiment': sentiment(message), }), status=200, mimetype='application/json', )
def youtube(): lyrics = session.attributes['lyric line'] playFromYoutube(lyrics) time.sleep(10) if sentiment(): msg = "Whats up, do you not like this song ?" else: msg = "I'm glad you liked the song ?" return question(msg)
def get_sentiment(): message = request.args.get('m') return Response( response=json.dumps({ 'sentiment': sentiment(message), }), status=200, mimetype='application/json', )
def chat(user_inp, *args): while True: #Get user input inp = user_inp if not inp: return "Please say something!" #instantiates sentiment object s = sentiment(inp) #determines if input is not a sentiment. if (s.isNotSentiment()): #Run every sentence with different synonym combinations till one is recognized sentence_list = synonym_sentences(user_inp) for inp in sentence_list: #results will hold the predicted value of the tags in corrispondence with the user's input results = model.predict([l.bag_of_words(inp, words)])[0] #Grab the highest result and store it in results_index results_index = numpy.argmax(results) #Grab the tag belonging to the highest result global tag tag = labels[results_index] #Un-comment the code below to see the probability % of each tag that matches in results, and the tag that has the max probability. #print(results) #print(tag) #Check if the probability is higher than a set amount. We use 0.8 here to determine if we want to bot to give a random #response or for it to say "it didn't understand" if results[results_index] > 0.8: for t in data["intents"]: for word in twit_tags: if tag == word: return twt.choose(word) for word in wk_tags: if tag == word: return wk.choosedef(tag, inp) if t['tag'] == tag: responses = t['responses'] return random.choice(responses) global others others = [ "I didn't quite understand", "I failed to understand what you were trying to say!", "Come again?", "Could you please repeat that for me?", "What language is that?" ] return random.choice(others) else: #Determines sentiment value and returns appropriate response. sent = s.sentiment_analysis() return s.sentimentNumber(sent)
def analyze(): url = flask.request.args.get('url') title, text, err = scrape(url) title_sent = sentiment(title) text_sent = sentiment(text) res = { 'title': title, 'sentiment': { 'title': { 'polarity': title_sent.polarity, 'subjectivity': title_sent.subjectivity }, 'text': { 'polarity': text_sent.polarity, 'subjectivity': text_sent.subjectivity } } } return flask.jsonify(res)
def corrected(dictionary): corrected_reference_list = [] with open(corrected_txt,'r') as r: for line in r: corrected_reference_list.append(line) corrected_test_set = {} for item in corrected_reference_list: corrected_test_set[item] = sentiment.sentiment(item,dictionary) return corrected_reference_list,corrected_test_set
def keyword_search(): cursor = tweepy.Cursor(api.search, q='Trudeau', tweet_mode="extended").items(20) for i in cursor: if "RT" not in i.full_text: out = "User " + str(i.user.screen_name) + " says:\n\n\"" + str( i.full_text) + "\"\n\n" s = sentiment(i.full_text) sent = s.sentiment_analysis() out += str(s.sentimentNumber(sent)) return out
def on_data(self, data): all_data = json.loads(data) tweet = all_data["text"] #Find the Sentiment of the tweet by calling sentiment function inside sentiment file. sentiment_value, confidence = s.sentiment(tweet) if confidence * 100 >= 80: output = open("Output/twitter-out.txt", "a") output.write(sentiment_value) output.write('\n') output.close() return True
def corrected(dictionary): corrected_reference_list = [] with open(corrected_txt, 'r') as r: for line in r: corrected_reference_list.append(line) corrected_test_set = {} for item in corrected_reference_list: corrected_test_set[item] = sentiment.sentiment(item, dictionary) return corrected_reference_list, corrected_test_set
def on_data(self, data): #print(data) try: d = json.loads(data) category, confidence = s.sentiment(d['text']) if confidence >= 80: with open(self.file_name, 'a', encoding='utf-8') as f: f.write(category) f.write('\n') except BaseException as e: print('The error status is:', str(e)) return True
def analyze(): url = urllib.parse.unquote(flask.request.args.get('url')) print("REQUEST URL ARG PARSED ", url) title, text = scrape(url) title_sent = sentiment(title) text_sent = sentiment(text) print(text_sent, title_sent) res = { 'title': title, 'sentiment': { 'title': { 'polarity': -0.216, 'subjectivity': title_sent.subjectivity }, 'text': { 'polarity': -0.216, 'subjectivity': text_sent.subjectivity } } } return flask.jsonify(res)
def sentiment_analysis(request): if request.method != 'POST' or 'text'not in request.POST: return HttpResponseBadRequest() text = request.POST['text'].encode('ascii', 'xmlcharrefreplace') sentiment_, features = sentiment.sentiment(text) features = sorted(features, key=lambda f: f[1]) return HttpResponse( simplejson.dumps( {'sentiment': '%0.2f' % sentiment_, 'features': '|'.join((unicode(f) for f in features))} ), mimetype='application/json')
def on_data(self, data): all_data = json.loads(data) tweet = all_data['text'] attitude, confidence = sent.sentiment(tweet) print tweet print attitude, confidence # if confidence * 100 >= 80: # output = open("twitter-out.txt", "a") # output.write(attitude) # output.write('\n') # output.close() return True
def find(keyword): extremes = {"min": "", "max": ""} SENTIMENTS = [] low = 1 high = -1 for tweet in query_tweets(keyword, 10): sent = sentiment.sentiment(tweet.text) if sent < low: low = sent extremes["min"] = tweet.text elif sent > high: high = sent extremes["max"] = tweet.text SENTIMENTS.append(sent) return int(1000 * sum(SENTIMENTS) / len(SENTIMENTS)), extremes
def on_data(self, data): all_data = json.loads(data) tweet = all_data["text"] sentiment_value, confidence = s.sentiment(tweet) print(tweet, sentiment_value, confidence) if confidence*100 >= 80: output = open("twitter-out.txt","a") output.write(sentiment_value) output.write('\n') output.close() return True
def get_data(url, limit): pText = """""" nText = """""" positive = 0 negative = 0 neutral = 0 reviewLength = 0 getCSV(url, limit) with open("Dataset/data.csv", mode="r") as csv_file: csv_reader = csv.DictReader(csv_file) for row in csv_reader: temp = json.loads(sentiment(row["content"])) if temp["errors"]: continue pScore = float( temp["documents"][0]["confidenceScores"]["positive"]) nScore = float( temp["documents"][0]["confidenceScores"]["negative"]) if pScore > 0.5: pText = pText + row["content"] positive += 1 elif nScore > 0.5: nText = nText + row["content"] negative += 1 else: neutral += 1 reviewLength += 1 # if cn == 20: # break # print(pText) result = dict() nSummarization = json.loads(summarization(nText)) pSummarization = json.loads(summarization(pText)) if "snippets" in nSummarization: result["negative"] = nSummarization["snippets"] else: result["negative"] = ["No negative reviews to show."] if "snippets" in pSummarization: result["positive"] = pSummarization["snippets"] else: result["positive"] = ["No positive reviews to show."] result["pScore"] = round((positive / reviewLength) * 100, 2) result["nScore"] = round((negative / reviewLength) * 100, 2) result["neScore"] = round((neutral / reviewLength) * 100, 2) return result
def on_data(self, data): try: all_data = json.loads(data) tweet = all_data["text"] review,confi=s.sentiment(tweet) print(tweet,review,confi) if confi>=80: file=open("tweets.txt","a") file.write(review) file.write('\n') file.close() return True except: return True
def find(keyword): extremes = {"min": "", "max": ""} low = 1 high = -1 SENTIMENTS = [] gen = api.search_comments(q=keyword, limit=500) for c in gen: sent = sentiment.sentiment(c.body) if sent < low: low = sent extremes["min"] = c.body elif sent > high: high = sent extremes["max"] = c.body SENTIMENTS.append(sent) return int(1000 * sum(SENTIMENTS) / len(SENTIMENTS)), extremes
def trip_advisor_data(): list_dicts = [] with open("trip_advisor_reviews_JBLU.json", "r", encoding="utf-8") as f: dataset = json.load(f) for data in dataset: sent_mag = sentiment(data["review"]) list_dicts.append({ "date": data["date"], "sentiment": sent_mag[0], "magnitude": sent_mag[1], }) with open("jetblue_tripadvisor_sent.json", "w", encoding="utf-8") as f: f.write(to_json_string(list_dicts)) print(list_dicts)
def on_data(self, data): #Loading all the data allData = json.loads(data) tweet = allData["text"] #Takes the data from twitter and returns classification and confidence sentimentValue, confidenceLevel = s.sentiment(tweet) #Printing live tweets including their sentiment value and confidence level print(tweet, sentimentValue, confidenceLevel) if confidenceLevel * 100 >= 90: output = open("twitterSentiments.txt", "a") output.write(sentimentValue) output.write('\n') output.close() return True
def yelp_data_to_json(): airline = "american" with open(f"{airline}_yelp.json", "r", encoding="utf-8") as f: load = f.read() dataset = json.loads(load) list_dicts = [] count = 0 for data in dataset: sent_mag = sentiment(data["review"]) list_dicts.append({ "date" : data["date"], "sentiment" : sent_mag[0], "magnitude" : sent_mag[1] }) count = len(list_dicts) if count % 20 == 0: print(f"collected {count} reviews!") with open(f"{airline}_yelp_sent.json", "w", encoding="utf-8") as f: f.write(to_json_string(list_dicts)) print(list_dicts)
def twitflick(): # lookup some new movies print("looking up titles") movie_titles = movie.top_box_office_titles() # find some tweets about those movies print("finding relevant tweets") movie_tweets = imap(twitter.fuzzy_find, movie_titles) movie_tweet_text = [[tweet.text for tweet in tweets] for tweets in movie_tweets] print movie_tweet_text # find the average sentiment of those tweets print("analyzing tweets") movie_sentiments = ([sentiment.sentiment(tweet) for tweet in tweets] for tweets in movie_tweet_text) avg_movie_sentiments = imap(avg, movie_sentiments) # remap the sentiments to a movie rating movie_ratings = imap(sentiment_to_rating, avg_movie_sentiments) movie_title_ratings = zip(movie_titles, movie_ratings) print movie_title_ratings
def sentiment_three(text): num = 0 pos_words = 0 neg_words = 0 for word in text.split(): poslines = open('words/positive.txt').read().splitlines() for line in poslines: if (word == line): num += 1 pos_words += 1 neglines = open('words/negative.txt.').read().splitlines() for line in neglines: if (word == line): num -= 1 neg_words += 1 score = math.log10(pos_words + 0.5) - math.log10(neg_words + 0.5) is_pos = sentiment.sentiment(text, 'data_sets/positive-50kb.txt', 'data_sets/negative-50kb.txt') if (is_pos == False and score > 0): score = score * -1 elif (is_pos == True and score < 0): score = score * -1 return is_pos, score
def test(test_set,dictionary): test = {} for key in test_set.keys(): test[key] = sentiment.sentiment(key,dictionary) return test
def amazon(amazon, keyword): if keyword in azCache: return azCache[keyword] prodASIN = BeautifulSoup(amazon.ItemSearch(Keywords = keyword, SearchIndex = "All", AssociateTag = "Random")).item.asin.string url = BeautifulSoup(amazon.ItemLookup(ItemId = prodASIN, IdType = "ASIN", ResponseGroup = "Reviews", AssociateTag = "Random")).iframeurl.string soup = BeautifulSoup(connect(url)) try: url = soup.find(class_ = "crIFrameNumCustReviews").a['href'] soup = BeautifulSoup(connect(url)) except: pass comments = soup.findAll(text=lambda text:isinstance(text, Comment)) print url v = [] for k in comments: if "BOUNDARY" not in k: continue try: v.append(k.next_sibling.next_sibling.next_sibling.next_sibling.next_sibling) except: pass ans = {} ans["data"]=[] for k in v: try: wt = int(k.find_all('div')[0].get_text().encode('ascii','ignore').strip().split()[0]) nm = k.find_all('div')[2].find('a').string.encode('ascii','ignore').strip() location = k.find_all('div')[2].get_text().split("(")[1].split(")")[0] for tag in k.find_all(): tag.decompose() review = k.get_text().strip() except: continue temp = {} s=sentiment.sentiment(review,keyword) temp["text"] = review temp["name"] = nm temp["location"] = location r = connect("http://dev.virtualearth.net/REST/v1/Locations?query=" + urllib.quote(location) + "&output=json&key=Ar31XPc8UKrmZMDPsEVNjdAcz4yyfLtrqAkSKrNnd-RQVpFUqz4xZH1UqqTOraQI") d = json.loads(r) try: temp["lat"] = d['resourceSets'][0]['resources'][0]['point']['coordinates'][0] except: temp["lat"] = "" try: temp["long"] = d['resourceSets'][0]['resources'][0]['point']['coordinates'][1] except: temp["long"] = "" temp["weight"] = wt temp["sentiment"]=s temp["userid"]=s ans["data"].append(temp) data2 = sorted(ans['data'], key = lambda user: user['weight'],reverse=True) ans["data"]=data2 azCache[keyword] = ans return ans
def chat(user_inp, *args): global lan while True: #Get user input inp = user_inp tinp = inp.lower() d = googletrans.LANGUAGES if("change language: " in tinp): if inp[17:] not in d.values(): return "Not a valid language" for k in d.keys(): if inp[17:] == d[k]: lan = k return "changed language" elif("translate this: " in tinp): translateWord = inp[16:] translateWord = translator.translate(translateWord,src="en",dest= lan) return translateWord.text elif("wiki: " in tinp): page_py = wiki_wiki.page(tinp[6:]) if(lan == "en"): return page_py.summary[:1000] + "..." page_py_cs = page_py.langlinks[lan] return page_py_cs.summary[:1000] + "..." else: if not inp: return "Please say something!" #instantiates sentiment object s = sentiment(inp) #determines if input is not a sentiment. if(s.isNotSentiment()): #Run every sentence with different synonym combinations till one is recognized sentence_list = synonym_sentences(user_inp) for inp in sentence_list: #results will hold the predicted value of the tags in corrispondence with the user's input results = model.predict([l.bag_of_words(inp, words)])[0] #Grab the highest result and store it in results_index results_index = numpy.argmax(results) #Grab the tag belonging to the highest result global tag tag = labels[results_index] #Un-comment the code below to see the probability % of each tag that matches in results, and the tag that has the max probability. #print(results) #print(tag) #Check if the probability is higher than a set amount. We use 0.8 here to determine if we want to bot to give a random #response or for it to say "it didn't understand" if results[results_index] > 0.8: for t in data["intents"]: if t['tag'] == tag: responses = t['responses'] if lan == "en": return random.choice(responses) translatedword = translator.translate(random.choice(responses),src="en",dest= lan) return translatedword.text #return random.choice(responses) global others others = ["I didn't quite understand", "I failed to understand what you were trying to say!", "Come again?", "Could you please repeat that for me?", "What language is that?"] return random.choice(others) else: #Determines sentiment value and returns appropriate response. sent = s.sentiment_analysis() return s.sentimentNumber(sent)
SUBREDDIT = "'The_Donald'" # Spark stuff from pyspark import SparkConf, SparkContext conf = SparkConf().setMaster("local").setAppName("Reading Ease") sc = SparkContext(conf=conf) # Load and initialize the Context to handle SQL from pyspark.sql import SQLContext sqlContext = SQLContext(sc) # Load database into dataframe DATABASE_PATH = "/home/marshall/Dropbox/OneDrive/Documents/Columbia/AdvancedBigDataAnalytics/EECSE6895_Final_Project/database/src/Reddit2.db" DATABASE_ENGINE = "jdbc:sqlite:" Threads_df = sqlContext.read.format('jdbc').options(url=''.join([DATABASE_ENGINE, DATABASE_PATH]), dbtable='Threads').load() Comments_df = sqlContext.read.format('jdbc').options(url=''.join([DATABASE_ENGINE, DATABASE_PATH]), dbtable='Comments').load() Threads_df.registerTempTable("Threads") Comments_df.registerTempTable("Comments") # Get threads for one subreddit SubThreads = sqlContext.sql("SELECT * FROM Threads WHERE subreddit={}".format(SUBREDDIT)) SubThreads.registerTempTable("SubThreads") # Get all comment bodies from comments in /r/The_Donald with more than 100 upvotes SubCommentBodies = sqlContext.sql("SELECT body FROM Comments INNER JOIN SubThreads ON SubThreads.id=Comments.thread_id WHERE Comments.ups > 100").rdd # Create new RDD of analyzed values SubCommentBodies = SubCommentBodies.filter(lambda x: not(x[0].startswith('http'))) SubCommentScores = SubCommentBodies.map(lambda x: sentiment(x[0])) # Calculate mean print SubCommentScores.mean()
#!/usr/bin/env python # -*- coding: utf-8 -*- import chardet import codecs import csv import sentiment with open('refefe-10000+280.csv') as f: content = f.read() rows = [' '.join(r.split()) for r in content.split('NULL')] reader = csv.reader(rows, delimiter=';') for row in reader: fefetimestamp = row[2] comment = row[4].decode('utf-8').encode('latin-1', 'replace') if comment is not None: mood = sentiment.sentiment(comment, 'de') print fefetimestamp, '\t', mood, '\t', comment
import senti_final as s >>>>>>> a4905fee5312d453f0b1be57074f5e1b35cdf0f9 import json #consumer key, consumer secret, access token, access secret. ckey="ZonlGH1oPGQ970D7r2N51yr9B" csecret="mSp4bLwRPu0ZyoxzaLNpKR2KHbn1vHh6PY5NoGs0BkghqUF2oj" atoken="594524977-indnnrhEIakq4WlFGX49bdfH2gnhGud2mQ7oA9NQ" asecret="tAfbdQjSibNOIwbIbTZBDwCOsMqnoOimYqOQQVPCGzs2E" class listener(StreamListener): def on_data(self,data): all_data = json.loads(data) tweet = all_data["text"] <<<<<<< HEAD print tweet sentiment_value,confidence = s.sentiment(tweet) print tweet,sentiment_value if confidence*100 >= 80: output = open('Donald_trump.txt','a') output.write(sentiment_value) ======= sentiment_value,confidence = s.sentiment(tweet) print (tweet,sentiment_value) if confidence*100 >= 55: output = open('Donald_trump.txt','w') output.write(str(sentiment_value)) output.write(tweet) >>>>>>> a4905fee5312d453f0b1be57074f5e1b35cdf0f9 output.write('\n') output.close()
from __future__ import print_function import sys sys.path.append("../src") import sentiment as s print(s.sentiment("This movie was awesome! The acting was great, plot was wonderful, and there were pythons...so yea!")) print(s.sentiment("This movie was utter junk. There were absolutely 0 pythons. I don't see what the point was at all. Horrible movie, 0/10")) print(s.sentiment("I crapped myself today on the bus")) print(s.sentiment("Does Vani Like Palash")) print(s.sentiment("AMAZING"))
def getDirectConnections(statuses, term): directConnections = [] for status in statuses: if term in status.user.name: continue if status.user.location: url = "http://dev.virtualearth.net/REST/v1/Locations?query="+urllib.quote(status.user.location.encode('ascii','ignore'))+"&output=json&key=Ar31XPc8UKrmZMDPsEVNjdAcz4yyfLtrqAkSKrNnd-RQVpFUqz4xZH1UqqTOraQI" r=st.connect(url) try: d = json.loads(r) except: d=r try: lt = d['resourceSets'][0]['resources'][0]['point']['coordinates'][0] except: lt = "" try: lng = d['resourceSets'][0]['resources'][0]['point']['coordinates'][1] except: lng = "" dConn = dict(text=status.text, name=status.user.name, lat=lt, long=lng, location=status.user.location, userid=status.user.id_str, sentiment=st.sentiment(status.text, term), weight=status.user.followers_count) directConnections.append(dConn) return directConnections