Пример #1
0
    def on_success(self, data):
        sentiment(Data, data['text'], negative, positive, twitter)

        #save timestamp, new data for positive, negative, neutral
        timestamp = twitter.get_user_timeline(screen_name='@realDonaldTrump')
        Data[5] = timestamp[0]['created_at']
        update = (dataPath, "w")
        update.write = (Data)
Пример #2
0
def drive():
    mentions = api.mentions_timeline(retrieve_last_seen_id(FILE))

    for mention in reversed(mentions):
        if mention.in_reply_to_status_id is not None:
            print('@mention used replying to a tweet')
            print(mention.id)
            print(mention.text)

            test = api.get_status(mention.in_reply_to_status_id)

            print(test.text)
            senti = s.sentiment(test.text)
            review = 'positive' if senti[0] == 'pos' else 'negative'
            api.update_status('@' + mention.author.screen_name + ' analysis:' +
                              review + "\nand variability: " + str(senti[1]),
                              in_reply_to_status_id=mention.id)

        else:
            print('@Mention used without replying to any tweet.')
            print(mention.id)
            print(mention.text)
            api.update_status('@' + mention.author.screen_name +
                              " please tweet in reply to some another tweet.",
                              in_reply_to_status_id=mention.id)

        time.sleep(5)
        store_last_seen_id(mention.id, FILE)
Пример #3
0
def process_news():

    s = open('online_khabar.txt', 'r')
    data = s.read()
    d = data.split('\n')
    cal = []
    news = News.objects(tested="False").first()
    # Actual detection being done here using AI .
    senti = sentiment(news.news_title)
    total_s = []
    counter = 0
    if 'Sidha Kura' in news.news_title:
        news.update(tested="True",
                    cause="News looks irrelevant",
                    similarlity="0.5")
    else:
        for i in range(len(d) - 1):

            cal.append(news.news_title)
            cal.append(d[counter])
            # Looks if the scraped news is in other known trusted site .
            s = SequenceMatcher(None, cal[0], cal[1]).ratio()
            total_s.append(s)

            counter += 1
            cal = []
        similarlity = max(total_s) * 100
        print(similarlity)
        news.update(tested="True",
                    cause="News not in trusted site",
                    similarlity=str(similarlity))
Пример #4
0
def calculate_accuracy(pos_data_set,neg_data_set):
	data = open('data_sets/test_data.txt','r').read()
	lines = data.split('\n')

	sentiment_score = 0;

	for line in lines:
		if(line == ""):
			print("")
		else:
			end = line.find(":")
			tweet = line[:end]
			score = line[len(line)-1:]

			is_pos = sentiment.sentiment(tweet,pos_data_set,neg_data_set)
			if(is_pos == "positive"):
				is_pos = "Y"
			else:
				is_pos = "N"
			if(is_pos == score):
				sentiment_score += 1
			

	size = len(lines)-1

	final_score = sentiment_score/size
	return final_score
Пример #5
0
def polarize():
    verses = Verse.query.all()
    for v in verses:
        v.polarity = sentiment(v.scripture_text)
        print("%6.8f %s" % (v.polarity, v.scripture_text))
    db.session.commit()
    return "Done"
Пример #6
0
def polarize():
    verses = Verse.query.all()
    for v in verses:
        v.polarity = sentiment(v.scripture_text)
        print("%6.8f %s" % (v.polarity, v.scripture_text))
    db.session.commit()
    return "Done"
Пример #7
0
def classify_sentiment(sentence):
    # create list to store multiple polarity
    negative = []
    positive = []
    # dictionary method
    polarity = sentiment.sentiment(sentence)
    # print "dictionary method->    "+polarity
    if polarity == POSITIVE:
        positive.append(POSITIVE)
    elif polarity == NEGATIVE:
        negative.append(NEGATIVE)
    non_alphanumeric_cleaning = re.sub(ur"[^\w\d'\s]+", "", sentence)
    li = ['naive_bayes', 'svm']
    for model in li:
        try:
            filepath = os.path.join(MODEL_PATH, str(model) + PKL)
            # Check whether model is trained or not
            if (os.path.exists(filepath) == False):
                return NO_MODEL_EXIST
            prediction = predict_sentiment(non_alphanumeric_cleaning, filepath)
            if prediction[LABEL] == NEGATIVE:
                negative.append(NEGATIVE)
            elif prediction[LABEL] == POSITIVE:
                positive.append(POSITIVE)
        except Exception as e:
            logging.exception(e.message)
            pass
    if len(negative) > 1:
        return NEGATIVE
    elif len(positive) > 1:
        return POSITIVE
    else:
        return NEUTRAL
Пример #8
0
def twitter_data_to_json():
    tweets = []
    list_dicts = []
    skippy = 0

    with open("twitterdata/jetblue_twitter.json", "r", encoding="utf-8") as f:
        load = f.read()

    tweets = json.loads(load)
    # get list of tweets
    for tweet in tweets:
        try:
            sent_mag = sentiment(tweet["review"])
        except Exception as e:
            print(e)
            skippy += 1
            continue
        # make sentiment/magnitude into a tuple
        list_dicts.append({
                "date" : tweet["date"],
                "sentiment" : sent_mag[0],
                "magnitude" : sent_mag[1]
                })
        count = len(list_dicts)
        if count % 20 == 0:
            print(f"collected {count} reviews!")
        # tweet[0] is actual tweet / tweet[1] is date

    with open("more_twitter_data.json", "w+", encoding="utf-8") as f:
              f.write(to_json_string(list_dicts))


    print(list_dicts)
    print(skippy)
    return list_dicts
Пример #9
0
def score_each_tweet(tweet_file, sent_lib):
  with open(tweet_file) as tf:
    for line in tf:
      tweet = json.loads(line , 'utf=8')
      if 'text' in tweet.keys():
        score = sentiment(tweet['text'], sent_lib)
        print score
Пример #10
0
def home(number):

    videoId = number
    comments = CE.commentExtract(videoId)
    if comments == "nocomment":
        return "nocomment"
    psent, nsent = CE.sentiment(comments)
    result = FS.fancySentiment(comments, videoId)
    return jsonify(positive=str(psent), negative=str(nsent))
Пример #11
0
def get_sentiment():
    message = request.args.get('m')
    return Response(
        response=json.dumps({
            'sentiment': sentiment(message),
        }),
        status=200,
        mimetype='application/json',
    )
Пример #12
0
def youtube():
    lyrics = session.attributes['lyric line']
    playFromYoutube(lyrics)
    time.sleep(10)
    if sentiment():
        msg = "Whats up, do you not like this song ?"
    else:
        msg = "I'm glad you liked the song ?"
    return question(msg)
Пример #13
0
def get_sentiment():
    message = request.args.get('m')
    return Response(
        response=json.dumps({
            'sentiment': sentiment(message),
        }),
        status=200,
        mimetype='application/json',
    )
Пример #14
0
def chat(user_inp, *args):
    while True:
        #Get user input
        inp = user_inp
        if not inp:
            return "Please say something!"

        #instantiates sentiment object
        s = sentiment(inp)
        #determines if input is not a sentiment.
        if (s.isNotSentiment()):
            #Run every sentence with different synonym combinations till one is recognized
            sentence_list = synonym_sentences(user_inp)
            for inp in sentence_list:

                #results will hold the predicted value of the tags in corrispondence with the user's input
                results = model.predict([l.bag_of_words(inp, words)])[0]
                #Grab the highest result and store it in results_index
                results_index = numpy.argmax(results)
                #Grab the tag belonging to the highest result
                global tag
                tag = labels[results_index]
                #Un-comment the code below to see the probability % of each tag that matches in results, and the tag that has the max probability.
                #print(results)
                #print(tag)

                #Check if the probability is higher than a set amount. We use 0.8 here to determine if we want to bot to give a random
                #response or for it to say "it didn't understand"
                if results[results_index] > 0.8:
                    for t in data["intents"]:
                        for word in twit_tags:
                            if tag == word:
                                return twt.choose(word)

                        for word in wk_tags:
                            if tag == word:
                                return wk.choosedef(tag, inp)

                        if t['tag'] == tag:
                            responses = t['responses']

                    return random.choice(responses)

            global others
            others = [
                "I didn't quite understand",
                "I failed to understand what you were trying to say!",
                "Come again?", "Could you please repeat that for me?",
                "What language is that?"
            ]
            return random.choice(others)

        else:
            #Determines sentiment value and returns appropriate response.
            sent = s.sentiment_analysis()
            return s.sentimentNumber(sent)
Пример #15
0
def analyze():
    url = flask.request.args.get('url')
    title, text, err = scrape(url)
    title_sent = sentiment(title)
    text_sent = sentiment(text)
    res = {
        'title': title,
        'sentiment': {
            'title': {
                'polarity': title_sent.polarity,
                'subjectivity': title_sent.subjectivity
            },
            'text': {
                'polarity': text_sent.polarity,
                'subjectivity': text_sent.subjectivity
            }
        }
    }
    return flask.jsonify(res)
def corrected(dictionary):
    corrected_reference_list = []
    with open(corrected_txt,'r') as r:
        for line in r:
            corrected_reference_list.append(line) 
    
    corrected_test_set = {}
    for item in corrected_reference_list:
        corrected_test_set[item] = sentiment.sentiment(item,dictionary)
        
    return corrected_reference_list,corrected_test_set
Пример #17
0
def keyword_search():
    cursor = tweepy.Cursor(api.search, q='Trudeau',
                           tweet_mode="extended").items(20)
    for i in cursor:
        if "RT" not in i.full_text:
            out = "User " + str(i.user.screen_name) + " says:\n\n\"" + str(
                i.full_text) + "\"\n\n"
            s = sentiment(i.full_text)
            sent = s.sentiment_analysis()
            out += str(s.sentimentNumber(sent))
            return out
Пример #18
0
 def on_data(self, data):
     all_data = json.loads(data)
     tweet = all_data["text"]
     #Find the Sentiment of the tweet by calling sentiment function inside sentiment file.
     sentiment_value, confidence = s.sentiment(tweet)
     if confidence * 100 >= 80:
         output = open("Output/twitter-out.txt", "a")
         output.write(sentiment_value)
         output.write('\n')
         output.close()
     return True
Пример #19
0
def corrected(dictionary):
    corrected_reference_list = []
    with open(corrected_txt, 'r') as r:
        for line in r:
            corrected_reference_list.append(line)

    corrected_test_set = {}
    for item in corrected_reference_list:
        corrected_test_set[item] = sentiment.sentiment(item, dictionary)

    return corrected_reference_list, corrected_test_set
 def on_data(self, data):
     #print(data)
     try:
         d = json.loads(data)
         category, confidence = s.sentiment(d['text'])
         if confidence >= 80:
             with open(self.file_name, 'a', encoding='utf-8') as f:
                 f.write(category)
                 f.write('\n')
     except BaseException as e:
         print('The error status is:', str(e))
     return True
Пример #21
0
def analyze():
    url = urllib.parse.unquote(flask.request.args.get('url'))
    print("REQUEST URL ARG PARSED ", url)
    title, text = scrape(url)
    title_sent = sentiment(title)
    text_sent = sentiment(text)
    print(text_sent, title_sent)
    res = {
        'title': title,
        'sentiment': {
            'title': {
                'polarity': -0.216,
                'subjectivity': title_sent.subjectivity
            },
            'text': {
                'polarity': -0.216,
                'subjectivity': text_sent.subjectivity
            }
        }
    }
    return flask.jsonify(res)
Пример #22
0
def sentiment_analysis(request):
    if request.method != 'POST' or 'text'not in request.POST:
        return HttpResponseBadRequest()
    text = request.POST['text'].encode('ascii', 'xmlcharrefreplace')
    sentiment_, features = sentiment.sentiment(text)
    features = sorted(features, key=lambda f: f[1])
    return HttpResponse(
                simplejson.dumps(
                    {'sentiment': '%0.2f' % sentiment_,
                     'features': '|'.join((unicode(f) for f in features))}
                ),
                mimetype='application/json')
Пример #23
0
    def on_data(self, data):
        all_data = json.loads(data)

        tweet = all_data['text']
        attitude, confidence = sent.sentiment(tweet)

        print tweet
        print attitude, confidence
        # if confidence * 100 >= 80:
        #     output = open("twitter-out.txt", "a")
        #     output.write(attitude)
        #     output.write('\n')
        #     output.close()
        return True
Пример #24
0
def find(keyword):
    extremes = {"min": "", "max": ""}
    SENTIMENTS = []
    low = 1
    high = -1
    for tweet in query_tweets(keyword, 10):
        sent = sentiment.sentiment(tweet.text)
        if sent < low:
            low = sent
            extremes["min"] = tweet.text
        elif sent > high:
            high = sent
            extremes["max"] = tweet.text
        SENTIMENTS.append(sent)
    return int(1000 * sum(SENTIMENTS) / len(SENTIMENTS)), extremes
Пример #25
0
 def on_data(self, data):
     
     all_data = json.loads(data)
     
     tweet = all_data["text"]
     sentiment_value, confidence = s.sentiment(tweet)
     print(tweet, sentiment_value, confidence)
     
     if confidence*100 >= 80:
         output = open("twitter-out.txt","a")
         output.write(sentiment_value)
         output.write('\n')
         output.close()
     
     return True
def get_data(url, limit):
    pText = """"""
    nText = """"""
    positive = 0
    negative = 0
    neutral = 0
    reviewLength = 0
    getCSV(url, limit)
    with open("Dataset/data.csv", mode="r") as csv_file:
        csv_reader = csv.DictReader(csv_file)
        for row in csv_reader:
            temp = json.loads(sentiment(row["content"]))
            if temp["errors"]:
                continue
            pScore = float(
                temp["documents"][0]["confidenceScores"]["positive"])
            nScore = float(
                temp["documents"][0]["confidenceScores"]["negative"])
            if pScore > 0.5:
                pText = pText + row["content"]
                positive += 1
            elif nScore > 0.5:
                nText = nText + row["content"]
                negative += 1
            else:
                neutral += 1
            reviewLength += 1
            # if cn == 20:
            #     break

    # print(pText)
    result = dict()
    nSummarization = json.loads(summarization(nText))
    pSummarization = json.loads(summarization(pText))
    if "snippets" in nSummarization:
        result["negative"] = nSummarization["snippets"]
    else:
        result["negative"] = ["No negative reviews to show."]

    if "snippets" in pSummarization:
        result["positive"] = pSummarization["snippets"]
    else:
        result["positive"] = ["No positive reviews to show."]

    result["pScore"] = round((positive / reviewLength) * 100, 2)
    result["nScore"] = round((negative / reviewLength) * 100, 2)
    result["neScore"] = round((neutral / reviewLength) * 100, 2)
    return result
Пример #27
0
 def on_data(self, data):
     try:
       all_data = json.loads(data)
      
       tweet = all_data["text"]
       review,confi=s.sentiment(tweet)
       print(tweet,review,confi)
       if confi>=80:
          file=open("tweets.txt","a")
          file.write(review)
          file.write('\n')
          file.close()
      
       return True
     except:
        return True
Пример #28
0
def find(keyword):
    extremes = {"min": "", "max": ""}
    low = 1
    high = -1
    SENTIMENTS = []
    gen = api.search_comments(q=keyword, limit=500)
    for c in gen:
        sent = sentiment.sentiment(c.body)
        if sent < low:
            low = sent
            extremes["min"] = c.body
        elif sent > high:
            high = sent
            extremes["max"] = c.body
        SENTIMENTS.append(sent)
    return int(1000 * sum(SENTIMENTS) / len(SENTIMENTS)), extremes
Пример #29
0
def trip_advisor_data():

    list_dicts = []

    with open("trip_advisor_reviews_JBLU.json", "r", encoding="utf-8") as f:
        dataset = json.load(f)

    for data in dataset:
        sent_mag = sentiment(data["review"])
        list_dicts.append({
            "date": data["date"],
            "sentiment": sent_mag[0],
            "magnitude": sent_mag[1],
        })

    with open("jetblue_tripadvisor_sent.json", "w", encoding="utf-8") as f:
        f.write(to_json_string(list_dicts))

    print(list_dicts)
Пример #30
0
    def on_data(self, data):

        #Loading all the data
        allData = json.loads(data)

        tweet = allData["text"]

        #Takes the data from twitter and returns classification and confidence
        sentimentValue, confidenceLevel = s.sentiment(tweet)

        #Printing live tweets including their sentiment value and confidence level
        print(tweet, sentimentValue, confidenceLevel)

        if confidenceLevel * 100 >= 90:
            output = open("twitterSentiments.txt", "a")
            output.write(sentimentValue)
            output.write('\n')
            output.close()

        return True
Пример #31
0
def yelp_data_to_json():
       airline = "american"

       with open(f"{airline}_yelp.json", "r", encoding="utf-8") as f:
              load = f.read()
       dataset = json.loads(load)
       list_dicts = []
       count = 0
       for data in dataset:
              sent_mag = sentiment(data["review"])
              list_dicts.append({
                            "date" : data["date"],
                            "sentiment" : sent_mag[0],
                            "magnitude" : sent_mag[1]
                            })
              count = len(list_dicts)
              if count % 20 == 0:
                     print(f"collected {count} reviews!")
       with open(f"{airline}_yelp_sent.json", "w", encoding="utf-8") as f:
              f.write(to_json_string(list_dicts))

       print(list_dicts)
Пример #32
0
def twitflick():
    # lookup some new movies
    print("looking up titles")
    movie_titles = movie.top_box_office_titles()

    # find some tweets about those movies
    print("finding relevant tweets")
    movie_tweets = imap(twitter.fuzzy_find, movie_titles)
    movie_tweet_text = [[tweet.text for tweet in tweets]
                        for tweets in movie_tweets]
    print movie_tweet_text

    # find the average sentiment of those tweets
    print("analyzing tweets")
    movie_sentiments = ([sentiment.sentiment(tweet) for tweet in tweets]
                        for tweets in movie_tweet_text)
    avg_movie_sentiments = imap(avg, movie_sentiments)

    # remap the sentiments to a movie rating
    movie_ratings = imap(sentiment_to_rating, avg_movie_sentiments)

    movie_title_ratings = zip(movie_titles, movie_ratings)
    print movie_title_ratings
Пример #33
0
def sentiment_three(text):
    num = 0
    pos_words = 0
    neg_words = 0
    for word in text.split():
        poslines = open('words/positive.txt').read().splitlines()
        for line in poslines:
            if (word == line):
                num += 1
                pos_words += 1
        neglines = open('words/negative.txt.').read().splitlines()
        for line in neglines:
            if (word == line):
                num -= 1
                neg_words += 1
    score = math.log10(pos_words + 0.5) - math.log10(neg_words + 0.5)
    is_pos = sentiment.sentiment(text, 'data_sets/positive-50kb.txt',
                                 'data_sets/negative-50kb.txt')

    if (is_pos == False and score > 0):
        score = score * -1
    elif (is_pos == True and score < 0):
        score = score * -1
    return is_pos, score
def test(test_set,dictionary):
    test = {}
    for key in test_set.keys():
        test[key] =  sentiment.sentiment(key,dictionary)
    
    return test
Пример #35
0
def amazon(amazon, keyword):

        if keyword in azCache:
                return azCache[keyword]

	prodASIN = BeautifulSoup(amazon.ItemSearch(Keywords = keyword, SearchIndex = "All", AssociateTag = "Random")).item.asin.string

	url = BeautifulSoup(amazon.ItemLookup(ItemId = prodASIN, IdType = "ASIN", ResponseGroup = "Reviews", AssociateTag = "Random")).iframeurl.string

	soup = BeautifulSoup(connect(url))
        try:
                url = soup.find(class_ = "crIFrameNumCustReviews").a['href']        
                soup = BeautifulSoup(connect(url))
        except:
                pass
        
	comments = soup.findAll(text=lambda text:isinstance(text, Comment))
        print url
	v = []
	for k in comments:
		if "BOUNDARY" not in k:
			continue
		try:
			v.append(k.next_sibling.next_sibling.next_sibling.next_sibling.next_sibling)
		except:
			pass

	ans = {}
	ans["data"]=[]
	for k in v:
		try:
                        wt = int(k.find_all('div')[0].get_text().encode('ascii','ignore').strip().split()[0])
		
                        nm = k.find_all('div')[2].find('a').string.encode('ascii','ignore').strip()                        
			location = k.find_all('div')[2].get_text().split("(")[1].split(")")[0]

                        for tag in k.find_all():
                                tag.decompose()
                        review = k.get_text().strip()
                except:
			continue
		temp = {}
                s=sentiment.sentiment(review,keyword)
		temp["text"] = review
		temp["name"] = nm
		temp["location"] = location
		r = connect("http://dev.virtualearth.net/REST/v1/Locations?query=" + urllib.quote(location) + "&output=json&key=Ar31XPc8UKrmZMDPsEVNjdAcz4yyfLtrqAkSKrNnd-RQVpFUqz4xZH1UqqTOraQI")
		d = json.loads(r)
		try: temp["lat"] = d['resourceSets'][0]['resources'][0]['point']['coordinates'][0]
		except: temp["lat"] = ""
		
		try: temp["long"] = d['resourceSets'][0]['resources'][0]['point']['coordinates'][1]
		except: temp["long"] = ""
		
		temp["weight"] = wt
                temp["sentiment"]=s
                temp["userid"]=s
                
		ans["data"].append(temp)
	data2 = sorted(ans['data'], key = lambda user: user['weight'],reverse=True)
	ans["data"]=data2
        azCache[keyword] = ans
	return ans
def chat(user_inp, *args):
    global lan
    while True:
        #Get user input
        inp = user_inp
        tinp = inp.lower()

        d = googletrans.LANGUAGES

        if("change language: " in tinp):
            if inp[17:] not in d.values():
                return "Not a valid language"

            for k in d.keys():
                if inp[17:] == d[k]:
                    lan = k            
            return "changed language"
    
 
        elif("translate this: " in tinp):
            translateWord = inp[16:]
            translateWord = translator.translate(translateWord,src="en",dest= lan)            
            return translateWord.text

        elif("wiki: " in tinp):
            page_py = wiki_wiki.page(tinp[6:])
            if(lan == "en"):
                return page_py.summary[:1000] + "..."
            page_py_cs = page_py.langlinks[lan]
            return page_py_cs.summary[:1000] + "..."

        else:
            if not inp:
                return "Please say something!"

            #instantiates sentiment object
            s = sentiment(inp)
            #determines if input is not a sentiment.
            if(s.isNotSentiment()):
                #Run every sentence with different synonym combinations till one is recognized
                sentence_list = synonym_sentences(user_inp)
                for inp in sentence_list:
                    
                    #results will hold the predicted value of the tags in corrispondence with the user's input    
                    results = model.predict([l.bag_of_words(inp, words)])[0]
                    #Grab the highest result and store it in results_index
                    results_index = numpy.argmax(results)
                    #Grab the tag belonging to the highest result
                    global tag
                    tag = labels[results_index]
                    #Un-comment the code below to see the probability % of each tag that matches in results, and the tag that has the max probability.
                    #print(results)
                    #print(tag)

                    #Check if the probability is higher than a set amount. We use 0.8 here to determine if we want to bot to give a random
                    #response or for it to say "it didn't understand"
                    if results[results_index] > 0.8:
                        for t in data["intents"]:
                            if t['tag'] == tag:
                                responses = t['responses']
                        if lan == "en":
                            return random.choice(responses)
                        translatedword = translator.translate(random.choice(responses),src="en",dest= lan)
                        return translatedword.text
                        #return random.choice(responses)
                global others
                others = ["I didn't quite understand", "I failed to understand what you were trying to say!", "Come again?", "Could you please repeat that for me?", "What language is that?"]
                return random.choice(others)

            else:
                #Determines sentiment value and returns appropriate response.
                sent = s.sentiment_analysis()
                return s.sentimentNumber(sent)
SUBREDDIT = "'The_Donald'"

# Spark stuff
from pyspark import SparkConf, SparkContext
conf = SparkConf().setMaster("local").setAppName("Reading Ease")
sc = SparkContext(conf=conf)

# Load and initialize the Context to handle SQL
from pyspark.sql import SQLContext
sqlContext = SQLContext(sc)

# Load database into dataframe
DATABASE_PATH = "/home/marshall/Dropbox/OneDrive/Documents/Columbia/AdvancedBigDataAnalytics/EECSE6895_Final_Project/database/src/Reddit2.db"
DATABASE_ENGINE = "jdbc:sqlite:"
Threads_df = sqlContext.read.format('jdbc').options(url=''.join([DATABASE_ENGINE, DATABASE_PATH]), dbtable='Threads').load()
Comments_df = sqlContext.read.format('jdbc').options(url=''.join([DATABASE_ENGINE, DATABASE_PATH]), dbtable='Comments').load()
Threads_df.registerTempTable("Threads")
Comments_df.registerTempTable("Comments")

# Get threads for one subreddit
SubThreads = sqlContext.sql("SELECT * FROM Threads WHERE subreddit={}".format(SUBREDDIT))
SubThreads.registerTempTable("SubThreads")

# Get all comment bodies from comments in /r/The_Donald with more than 100 upvotes
SubCommentBodies = sqlContext.sql("SELECT body FROM Comments INNER JOIN SubThreads ON SubThreads.id=Comments.thread_id WHERE Comments.ups > 100").rdd

# Create new RDD of analyzed values
SubCommentBodies = SubCommentBodies.filter(lambda x: not(x[0].startswith('http')))
SubCommentScores = SubCommentBodies.map(lambda x: sentiment(x[0]))
# Calculate mean
print SubCommentScores.mean()
Пример #38
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import chardet
import codecs
import csv
import sentiment

with open('refefe-10000+280.csv') as f:
    content = f.read()

rows = [' '.join(r.split()) for r in content.split('NULL')]
reader = csv.reader(rows, delimiter=';')
for row in reader:
    fefetimestamp = row[2]
    comment = row[4].decode('utf-8').encode('latin-1', 'replace')
    if comment is not None:
        mood = sentiment.sentiment(comment, 'de')
        print fefetimestamp, '\t', mood, '\t', comment
import senti_final as s
>>>>>>> a4905fee5312d453f0b1be57074f5e1b35cdf0f9
import json
#consumer key, consumer secret, access token, access secret.
ckey="ZonlGH1oPGQ970D7r2N51yr9B"
csecret="mSp4bLwRPu0ZyoxzaLNpKR2KHbn1vHh6PY5NoGs0BkghqUF2oj"
atoken="594524977-indnnrhEIakq4WlFGX49bdfH2gnhGud2mQ7oA9NQ"
asecret="tAfbdQjSibNOIwbIbTZBDwCOsMqnoOimYqOQQVPCGzs2E"

class listener(StreamListener):
        def on_data(self,data):
                all_data = json.loads(data)
                tweet = all_data["text"]
<<<<<<< HEAD
                print tweet
                sentiment_value,confidence = s.sentiment(tweet)
                print tweet,sentiment_value
                if confidence*100 >= 80:
                        output = open('Donald_trump.txt','a')
                        output.write(sentiment_value)
=======
                sentiment_value,confidence = s.sentiment(tweet)
                print (tweet,sentiment_value)
                if confidence*100 >= 55:
                        output = open('Donald_trump.txt','w')
                        output.write(str(sentiment_value))
			output.write(tweet)
>>>>>>> a4905fee5312d453f0b1be57074f5e1b35cdf0f9
                        output.write('\n')
                        output.close()
from __future__ import print_function
import sys
sys.path.append("../src")
import sentiment as s

print(s.sentiment("This movie was awesome! The acting was great, plot was wonderful, and there were pythons...so yea!"))
print(s.sentiment("This movie was utter junk. There were absolutely 0 pythons. I don't see what the point was at all. Horrible movie, 0/10"))
print(s.sentiment("I crapped myself today on the bus"))
print(s.sentiment("Does Vani Like Palash"))
print(s.sentiment("AMAZING"))
Пример #41
0
def getDirectConnections(statuses, term):
    directConnections = []
    for status in statuses:
        if term in status.user.name:
            continue
        if status.user.location:    
            url = "http://dev.virtualearth.net/REST/v1/Locations?query="+urllib.quote(status.user.location.encode('ascii','ignore'))+"&output=json&key=Ar31XPc8UKrmZMDPsEVNjdAcz4yyfLtrqAkSKrNnd-RQVpFUqz4xZH1UqqTOraQI"
            r=st.connect(url)
            try:
                d = json.loads(r)
            except:
                d=r
        try:
            lt = d['resourceSets'][0]['resources'][0]['point']['coordinates'][0]
        except: lt = ""

        try:
            lng = d['resourceSets'][0]['resources'][0]['point']['coordinates'][1]
        except:
            lng = ""
        
        dConn = dict(text=status.text, name=status.user.name, lat=lt, long=lng, location=status.user.location, userid=status.user.id_str, sentiment=st.sentiment(status.text, term), weight=status.user.followers_count)
        directConnections.append(dConn)
    return directConnections