Python vaderSentiment示例，vaderSentiment.vaderSentiment.vaderSentiment Python示例

示例#1

0

显示文件

文件： run.py 项目： hugochan/Natural-Language-Processing

def sent_sentiment(sent, analyzer):
    if analyzer == 'textblob':
        from textblob import TextBlob
        testimonial = TextBlob(sent)
        polarity = testimonial.sentiment.polarity
        if polarity > pos_threshold:
            # print 'positive'
            return 1
        elif polarity < neg_threshold:
            # print 'negative'
            return -1
        else:
            # print 'neutral'
            return 0
    elif analyzer == 'vader':
        from vaderSentiment.vaderSentiment import sentiment as vaderSentiment
        try:
            vs = vaderSentiment(sent)
        except:
            vs = vaderSentiment(sent.encode('utf8'))
        if vs['pos'] > vs['neg'] and vs['pos'] > vs['neu']:
            return 1
        elif vs['neg'] > vs['neu']:
            return -1
        else:
            return 0
    else:
        raise ValueError("Got incorrect analyzer type: %s" % analyzer)

示例#2

0

显示文件

文件： search_twitter.py 项目： jatkins23/GIS_Political_Polarization_Project

def run_twitter_search(keyword, output_file):
    print(keyword.upper())
    counter = 0
    try:
        tso = TwitterSearchOrder() # create a TwitterSearchOrder object
        tso.set_keywords([str(keyword)]) # let's define all words we would like to have a look for
        # tso.set_language('en') # we want to see English tweets only
        tso.set_include_entities(True) # and don't give us all those entity information
        tso.set_geocode(45.551279, -92.586955, 530, imperial_metric=True)

        for tweet in ts.search_tweets_iterable(tso):
            counter = counter + 1
            search_term = keyword
            username = "******" if tweet['user']['screen_name'] is None else tweet['user']['screen_name']
            text = "NONE" if tweet['text'] is None else tweet['text']
            place = "NONE" if tweet['place'] is None else tweet['place']
            if (tweet['coordinates'] is not None):
                lat = tweet['coordinates']['coordinates'][1]
                lng = tweet['coordinates']['coordinates'][0]
            elif (tweet['place'] is not None):
                place_coordinates = tweet['place']['bounding_box']['coordinates']
                sum_lat = 0
                sum_lng = 0
                for pair in place_coordinates[0]:
                    sum_lat += pair[1]
                    sum_lng += pair[0]
                lat = sum_lat / len(place_coordinates[0])
                lng = sum_lng / len(place_coordinates[0])
                place = tweet['place']['full_name']
            else:
                lat = "NONE"
                lng = "NONE"
            location = "NONE" if tweet['user']['location'] is None else tweet['user']['location']
            created_at = "NONE" if tweet['created_at'] is None else tweet['created_at']
            description = "NONE" if tweet['user']['description'] is None else tweet['user']['description']
            verified = "NONE" if tweet['user']['verified'] is None else str(tweet['user']['verified'])
            sentiment_score = vaderSentiment(text.encode('utf-8'))
            compound_sentiment = sentiment_score['compound']
            description_sentiment = vaderSentiment(description.encode('utf-8'))['compound']
            try:
                df.loc[len(df)] = [search_term, username, text, lat, lng, location, created_at, place, description, verified, sentiment_score, compound_sentiment, description_sentiment]
                if((len(df) % 200) == 0):
                    write_to_excel(output_file, 'Sheet1', df)
                    print("_%s %s tweets/%s total" % (counter, keyword.upper(), len(df)))
            except:
                write_to_excel(output_file, str(keyword), df)
            if(counter == 10000):
                return
        write_to_excel(output_file, str(keyword), df)
        print("_______%s tweets saved" % (len(df)))

    except TwitterSearchException as e: # take care of all those ugly errors if there are some
        print(e)

示例#3

0

显示文件

文件： views.py 项目： bruno207/feels-analysis

 def determineSentiment(track_lyrics):
     # VADER Sentiment calculatrion and database storage
     try:
         track_lyrics = track_lyrics.encode('utf-8')
         sentiment = vaderSentiment(track_lyrics)
     except UnicodeEncodeError:
         track_lyrics = track_lyrics.encode('ascii', 'ignore')
         sentiment = vaderSentiment(track_lyrics)
     neg = sentiment['neg']
     neu = sentiment['neu']
     pos = sentiment['pos']
     return (neg,neu,pos)

示例#4

0

显示文件

def analyzeSentiment(sentences):

    """
    analyzeSentiment(listOfString) -> listofJSON

    returns a JSON object wrapped in an array, that contains the amount of positive, 
    neutral and negative tweets ready to be processed by charJS

    """

    neu = 0
    pos = 0
    neg = 0 
    
    #declare if each sentence carries a negative, neutral of positive sentiment    
    for sentence in sentences:
        vs = vaderSentiment(sentence)

        if (vs['neg'] > vs['pos']):
            neg+=1
        elif (vs['pos'] > vs['neg']):
            pos+=1
        else:
            neu+=1
   
    #output is in this format so chartJs can use it in the web-App
    return_data = [{'value': neg, 'label': 'Negative Tweets','color': '#D18177'},
    {'value': neu, 'label': 'Neutral Tweets','color': '#9CBABA'},
    {'value': pos, 'label': 'Positive Tweets','color': '#00688B'}]
  
        
    return return_data

示例#5

0

显示文件

def process_tweet(tweet):
    tweet = json.loads(tweet)
    tweet_text = tweet.get('text')
    trump = word_in_text('trump', tweet_text)
    clinton = word_in_text('clinton', tweet_text)
    if trump is False and clinton is False:
        return 0
    vs = vaderSentiment(tweet_text.encode('utf-8'))
    if vs.get('neu') == 1 or (vs.get('compound') > -.35
                              and vs.get('compound') < .35):
        return 0
    if vs.get('compound') > .35:
        pos = True
    if vs.get('compound') < -.35:
        pos = False
    post = {
        'trump': trump,
        'clinton': clinton,
        'vader': vs,
        'positive': pos,
        'text': tweet_text
    }

    posts = db.tweets
    post_id = posts.insert_one(post).inserted_id
    count_tweets(True, 'trump')

示例#6

0

显示文件

文件： pipelines.py 项目： JaiGatsby/Pulse

    def process_item(self, item, spider):
        try:
            item['vaderSentiment'] = vaderSentiment(item['lenArticle'])
        except:
            pass
        success_count = 0
        tech_count = 0
        update_count = 0
        partner_count = 0
        contest_count = 0
        tokens = nltk.word_tokenize(item['lenArticle'])
##        tokens = sorted(w for w in set(tokens) if len(w) > 1)
        
        for word in tokens:
            word = word.lower()
            if word in self.words_to_remove:
                tokens.remove(word)
            if word in self.words_to_rank_success:
                success_count += 1
            if word in self.words_to_rank_tech:
                tech_count += 1
            if word in self.words_to_rank_update:
                update_count += 1
            if word in self.words_to_rank_partner:
                partner_count += 1
            if word in self.words_to_rank_contest:
                contest_count += 1
                
        item['lenArticle'] = len(tokens)
        fdist = nltk.FreqDist(tokens)
        item['fdist'] = fdist.most_common(50)
        item['topic'] = {'success':success_count,'technology':tech_count,'update':update_count,'partner':partner_count,'contest':contest_count}
        return item
        """Look at the following commented out code, use a similar loop structure and """

示例#7

0

显示文件

文件： linclassifer.py 项目： maniarathi/takethislifedata

def bigrams_unigrams_sentiment(texts):
    all_feats = []
    for text in texts:
        profanity_count = 0
        appreciation_count = 0
        help_count = 0
        for string in appreciation:
            appreciation_count += text.lower().count(string)
        for string in profanity:
            profanity_count += text.lower().count(string)
        for string in seeking_help:
            help_count += text.lower().count(string)
        words = text.split(" ")
        bigrams = []
        for i in xrange(len(words) - 1):
            bigrams.append(words[i] + " " + words[i+1])
        features = Counter(bigrams)
        features += Counter(words)
        vs = vaderSentiment(text)
        #print text, vs
        features['neg_sentiment'] = vs['neg']
        features['neu_sentiment'] = vs['neu']
        features['pos_sentiment'] = vs['pos']
        features['profanity_count'] = profanity_count
        features['appreciation_count'] = appreciation_count
        features['help_count'] = help_count
        features['text_length'] = len(text)
        all_feats.append(features)
    vectorizer = DictVectorizer(sparse=False)
    return vectorizer.fit_transform(all_feats)

示例#8

0

显示文件

def get_vadersentiment_polarity(text_to_analyze):
    analyzer = vaderSentiment()
    scores = analyzer.polarity_scores(text_to_analyze)
    neg = scores['neg']
    pos = scores['pos']
    polarity = pos - neg
    return polarity

示例#9

0

显示文件

文件： newsSpider.py 项目： JaiGatsby/Pulse

    def parse_dir_contents(self,response):
	# This creates an instance of the item, the definition of which is stored in items.py
	item = Website()
	
	# The if statement checks if there exists a div with id = 'maincontent', suggesting we are on the cse website, hence no error will be thrown
	if (response.xpath('//div[@id="maincontent"]') != []):
		for sel in response.xpath('//div[@id="maincontent"]'):
			item['name'] = sel.xpath('h1/text()').extract()
			temp = []
			for ptag in sel.xpath('p'):
				temp += ptag.xpath('text()').extract()
			
		
	else:
		item["name"] = response.xpath('//h1/text()').extract()
		temp = []
		for ptag in response.xpath('//p'):
			temp += ptag.xpath('text()').extract()
	
	item['description'] = "" 
	for i in temp:
		item['description'] += i
	item['lenArticle']= len(item['description'].split())
	# This line computes the sentiment of the current article, and adds it to the solution object
	item['vaderSentiment'] = vaderSentiment(item['description'])
	# This basically gets the first part of the text from the right p tag of the footer, converts it into a str, and slices the needed part
	dateText =  response.xpath('//div[@id="footer"]/p[@class="right"]/text()[1]').extract()
	date = dateText[0]
	item['date'] = date[16:]
	yield item

示例#10

0

显示文件

文件： process_data_local.py 项目： jpgard/chicago-restaurant-inspection-googleplaces-mashup

def avg_review_vaderSentiment(data):
	
	sentiment_totals = defaultdict(int)
	result_categories = set()
	output = []

	for each in data.items():

		results = each[1]['results']
		result_categories.add(results)
		reviews_list = each[1].get('reviews')

		if reviews_list:

			n_reviews = len(reviews_list)
			review_sentiments = [vaderSentiment(x[1].encode('utf8')) for x in reviews_list]

			sentiment_totals[results + '_n_reviews'] += n_reviews
			sentiment_totals[results + '_pos_sentiment'] += sum([i['pos'] for i in review_sentiments])
			sentiment_totals[results + '_neg_sentiment'] += sum([i['neg'] for i in review_sentiments])
			sentiment_totals[results + '_neu_sentiment'] += sum([i['neu'] for i in review_sentiments])
			sentiment_totals[results + '_compound_sentiment'] += sum([i['compound'] for i in review_sentiments])

	for result in result_categories:
		t = (result, sentiment_totals[result + '_n_reviews'], float(sentiment_totals[result + '_pos_sentiment'])/float(sentiment_totals[result + '_n_reviews']), float(sentiment_totals[result + '_neg_sentiment'])/float(sentiment_totals[result + '_n_reviews']) ,float(sentiment_totals[result + '_compound_sentiment'])/float(sentiment_totals[result + '_n_reviews']))
		output.append(t)

	return output

示例#11

0

显示文件

文件： process_data_local.py 项目： jpgard/chicago-restaurant-inspection-googleplaces-mashup

def max_review_vaderSentiment(data, n, min_length):

    vader_reviews = {}

    pos_reviews = []
    neg_reviews = []

    for each in data.items():

        reviews_list = each[1].get('reviews')

        if reviews_list:

            review_sentiments = [
                (x[1].encode('utf8'), vaderSentiment(x[1].encode('utf8')))
                for x in reviews_list
                if (len(nltk.word_tokenize(x[1])) >= min_length)
            ]

            for r in review_sentiments:
                vader_reviews[r[0]] = (r[1]['neg'], r[1]['pos'])

    top_neg = sorted(vader_reviews.items(),
                     key=lambda x: x[1][0],
                     reverse=True)[0:n + 1]
    top_pos = sorted(vader_reviews.items(),
                     key=lambda x: x[1][1],
                     reverse=True)[0:n + 1]

    return top_pos, top_neg

示例#12

0

显示文件

文件： sentiment.py 项目： yaolili/restaurantClassification

 def VSPolarity(self, corpus):
     self.result = []
     for sentence in corpus:
         vs = vaderSentiment(sentence)
         aList = [vs["neg"], vs["neu"], vs["pos"]]
         self.result.append(aList)
     return np.array(self.result)

示例#13

0

显示文件

文件： sentiment.py 项目： yaolili/crossLingualSentiment

 def VSPolarity(self, corpus):
     self.result = []
     for sentence in corpus:
         vs = vaderSentiment(sentence)
         aList = [vs["neg"], vs["neu"], vs["pos"]]
         self.result.append(aList)
     print "Sentiment VSPolarity done!"
     return np.array(self.result)

示例#14

0

显示文件

文件： spark_client.py 项目： nrajed/SentimentCloud

def discretized_vader(text, cutoffs=[-0.150, 0.150]):
    '''
    Give a discreted VADER sentiment score
    '''
    score = vaderSentiment(text)['compound']
    if score < cutoffs[0]:
        return 1
    elif score > cutoffs[1]:
        return 3
    return 2

示例#15

0

显示文件

def getSentiment():
    for Tweet_text in Tweet_Corpus:
        vs = str(vaderSentiment(Tweet_text))
        #print Tweet_text
        #print str(vs)
        cur.execute(
            "INSERT INTO sentiment (tweet_text, sentiment) VALUES (%s, %s)",
            (Tweet_text, vs))
    cur.fetchall()
    db.commit()

示例#16

0

显示文件

文件： vaderSentiment_analysis.py 项目： Meadowmoon/AmazonReviewAnalytics

def calculate_Sentiment(item):
    reviews = []
    values_list = item[1]

    for review in values_list:
        with open("/path/to/sentiment_results_file", "a") as f:
            sentiment_score = vaderSentiment(review[2])['compound']
            f.write("\n Product ID: " + str(item[0]) + "   Review: " +
                    str(review[2]) + "   User Rating: " + str(review[1]) +
                    "   Sentiment Score: " + str(sentiment_score))

示例#17

0

显示文件

文件： project.py 项目： po1939/Data-Mining-Project

def facebookReport(id):
    fb = facebook.GraphAPI(ACCESS_TOKEN)
    d_posts = fb.get_connections(id, 'posts')
    for i in range(10):
        post = d_posts['data'][i]
        postText = post['message']
        vs = vaderSentiment(postText.encode('utf-8'))
        words = postText.split()
        lexicalDiversity = len(set(words)) * 1.0 / len(words)
        print 'Post Text:', removeUnicode(postText)
        print 'Lexical Diversity:', lexicalDiversity
        print 'Sentiment:', vs['compound']
        print '================================================\n'

示例#18

0

显示文件

文件： twitter_hemang.py 项目： jwilliams0496/CMSC491

def get_tweets(q, count):
    auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,CONSUMER_KEY,CONSUMER_SECRET)
    tw = twitter.Twitter(auth=auth)
    
    print "==============================" * 2
    print "\n\t" + '\033[95m' +  str(count) + " Tweets with " + q + '\033[0m' + "\n"
    print "==============================" * 2
    
    tweets = tw.search.tweets(q=q, count=count, lang='en')
    
    texts = []
    counter = 0
    neg_tweet = 0
    pos_tweet = 0
    retweet_counter = 0
    total_senti = 0
    for status in tweets["statuses"]:
        counter += 1
        texts.append(status["text"])
        
        words = []
    
        for w in removeUnicode(status['text']).split():
            words.append(w)
        
        vs = vaderSentiment(status["text"].encode('utf-8'))
        if (vs['compound'] > 0): pos_tweet += 1
        if (vs['compound'] < 0): neg_tweet += 1
        print "\n" + "="* 10 + "Tweet # " + str(counter) + "=" * 10 + "\n"    
        print removeUnicode(status['text'])
        retweet_counter += status["retweet_count"]
        print "\nRetweet count: %d\n" % (status["retweet_count"])
        print "Lexical Diversity: ", 1.0 * len(set(words)) / len(words)
        print "\nSentiment: " + str(vs['compound'])
        total_senti += vs['compound']

    print "==============================" * 2
    print "\nReport over %d tweets: \n" % counter
    print "Total number of retweets: %d\n" % retweet_counter

    print "Negative tweets: " + str(neg_tweet)
    print "\nPositive tweets: " + str(pos_tweet)
    print "\nOverall sentiment: " + str(total_senti/counter)

    words = []
    for t in texts:
        words += [ w for w in removeUnicode(t).split() ]

    lexical_diversity = 1.0 * len(set(words)) / len(words)
    print "\nOverall lexical diversity: ", lexical_diversity

示例#19

0

显示文件

文件： emojjr.py 项目： amnda-d/Emojjr

def func(std, emojificationParam, text, outFile="out.txt"):
    if text[-4:] == '.txt':
        f = open(text, "r")
        content = f.read()
    else:
        content = text
    output = open(outFile, "w")
    sentences = re.split("[?\.!]", content)
    for sentence in sentences:
        vs = vaderSentiment(sentence)
        text = word_tokenize(sentence)
        pos_tags = pos_tag(text)
        myEpd = epd.epd(0, std)
        emojipos = []
        for sample in range(
                0, int(np.random.rand() * emojificationParam * len(text))):
            a = myEpd.getPosition(text)
            if not a is None:
                emojipos += [int(a)]
        righttags = []
        for position in emojipos:
            if getTag(pos_tags, position) == True:
                if righttags.count(position) < 3:
                    righttags += [position]
        score = vs["compound"] * 50
        final_emojis = []
        emojis = []  #index to list of unicode values
        for k, v in emoji.items():
            for val in v:
                if val >= score - 5 and val <= score + 5:
                    emojis += [k]
        for index in righttags:
            sample = int(np.floor(np.random.rand() * len(emojis)))
            choice = emojis[sample]
            final_emojis += [(index, choice)]
        text = [unicode(t, "utf-8") for t in text]
        for index, e in final_emojis:
            if index == 0:
                samp = np.random.rand()
                if samp > .5:
                    text[index] = text[index] + e
                else:
                    text[index] = e + text[index]
            else:
                text[index] = text[index] + e
        s = " ".join(text)
        if len(s) > 2:
            output.write((s + unicode(". ")).encode("utf-8"))
            print(s + unicode(". ")).encode("utf-8")
    output.close()

示例#20

0

显示文件

文件： tweetstatistic.py 项目： rpamidimarri/ucb-filmzz

    def process(self, tup):
        id = tup.values[0]
        title = tup.values[1].encode('ascii', 'ignore')
        tweet = tup.values[2].encode('ascii', 'ignore')
        # get the good tweet sentence emitted by the Parse bolt 
        # Check if this is the data structure that is emitted by ParseTweet bolt
        # Get the sentiment values for the tweet. 
        vs = vaderSentiment(tweet) 
        # print "\n\t" + str(vs)
        # vs is a dictionary in the form of {'neg': value1, 'neu': value2, 'pos': value3, 'compound': value4}
        # Create a list version of the vs dictionary
        #vslist = list(vs.values())

        # Emit the sentiment values. This will be a set of 4 key-value pairs in JSON format
        # self.emit_many(vs)
        # tuple acknowledgement is handled automatically


        # Get existing running count, running values for positive, negative, neutral & compound sentiment 
        # based on keyword, tmdbid, and increment it by one (for the counter) and the sentiment values from vs.
        #conn = psycopg2.connect(database="filmzz", user="******", password="******", host="localhost", port="5432")
        #cur = conn.cursor()
        currentCount=0
        currentSentiment=[]
        cur.execute("SELECT runningCount from TweetStatistic WHERE tmdbId=%s", [id])
        result=cur.fetchone()
        if result != None:
            currentCount=result[0]
      
        for key, value in vs.items():
	    currentSentiment.append(value) 
         
        if currentCount == 0:
            # Insert new rows into Tweets Statistic database with values for counter value, Movie Title and tmdbId
            currentCount = currentCount + 1
            cur.execute("INSERT INTO TweetStatistic (tmdbId, title, runningCount, runningNegativeSentiment, runningNeutralSentiment, runningPositiveSentiment, runningCompoundSentiment) VALUES (%s,%s,%s,%s,%s,%s,%s)", [id, title, currentCount, currentSentiment[0], currentSentiment[1], currentSentiment[2], currentSentiment[3]])
        else:
            cur.execute("SELECT runningNegativeSentiment, runningNeutralSentiment, runningPositiveSentiment, runningCompoundSentiment from TweetStatistic WHERE tmdbId=%s", [id])
            result=cur.fetchone()
            currentCount = currentCount + 1
            if result != None:
               runningNegativeSentiment=currentSentiment[0]+result[0]
               runningNeutralSentiment=currentSentiment[1]+result[1]
               runningPositiveSentiment=currentSentiment[2]+result[2]
               runningCompoundSentiment=currentSentiment[3]+result[3]
               self.log('Updating the statistics for id:%s' %(id))
               # Update the Tweets Statistic database
               cur.execute("UPDATE TweetStatistic SET runningCount = %s, runningNegativeSentiment = %s, runningNeutralSentiment = %s, runningPositiveSentiment=%s, runningCompoundSentiment = %s WHERE tmdbId = %s", (currentCount, runningNegativeSentiment, runningNeutralSentiment, runningPositiveSentiment, runningCompoundSentiment, id))
               conn.commit()

示例#21

0

显示文件

文件： project.py 项目： po1939/Data-Mining-Project

def print_statistics(tweets):
    for item in tweets:
        print "======================"
        print item["text"]
        print "Favorite Count: ", item["favorite_count"]
        print "Retweets: ", item["retweet_count"]
        print "Lexical Diversity", lexical_diversity(item["text"])
        print "Sentiment Analysis:", vaderSentiment(
            item["text"].encode('utf-8'))['compound']

        # not 100% sure this'll work
        if item["user"]:
            print "Username: "******"user"]["screen_name"].encode('utf-8')
            print "Description: ", item["user"]["description"].encode('utf-8')
            print "Location: ", item["user"]["location"].encode('utf-8')

示例#22

0

显示文件

文件： runner.py 项目： nrajed/SentimentCloud

def run_vader_sentiment_analyzer(batch, sentiments, cutoffs=[-0.50, 0.50]):
    '''
    One of the NLP tools used to evaluate sentiment.
    VADER outputs a compound score between -1 and +1 so we must make
    our own wrapper to categorize the scores the way we see fit.
    Cutoffs for negative, nuetral, and positive can be optionally specified.
    '''
    for text in batch:
        vs = vaderSentiment(text)
        score = vs['compound']
        if score < cutoffs[0]:
            sentiments['negative'] += 1
        elif score > cutoffs[1]:
            sentiments['positive'] += 1
        else:
            sentiments['neutral'] += 1

示例#23

0

显示文件

def sentiment_analysis(tweet):
    """Returns the sentiment of the tweet from -1 to 1"""
    vs = vaderSentiment(tweet)
    pos = vs["pos"]
    neg = vs["neg"]
    neu = vs["neu"]

    # find highest scoring sentiment
    highest = max(pos, neg, neu)

    if highest == neg:
        # negate value and then add to dict
        highest *= -1
    else:
        # neutral value = 0.0 and add to dict
        highest = 0.0

    return highest

示例#24

0

显示文件

def compute_vader_sentiments(posts):
    cnt = 0
    sentiments = []
    for post in posts:
        cnt += 1
        data = {}
        data["post"] = post
        vs = vaderSentiment(post)
        #print vs
        if vs["neg"] >= vs["neu"] and vs["neg"] >= vs["pos"]:
            data["label"] = "neg"
        elif vs["pos"] >= vs["neu"] and vs["pos"] >= vs["neg"]:
            data["label"] = "pos"
        elif vs["neu"] >= vs["neg"] and vs["neu"] >= vs["pos"]:
            data["label"] = "neutral"
        sentiments.append(data)
    print "sentiments computed for %d posts" % cnt
    return sentiments

示例#25

0

显示文件

def sentiment_classifier(course_code):
    # delete all previous classifications
    Classification.objects.filter(classifier='VaderSentiment').delete()
    # get messages
    sm_objs = LearningRecord.objects.filter(course_code=course_code)

    for sm_obj in sm_objs:
        message = sm_obj.message.encode('utf-8', 'replace')
        sentiment = "Neutral"
        vs = vaderSentiment(message)
        #print vs, message
        #print "\n\t" + str(vs)
        if (vs['compound'] > 0):
            sentiment = "Positive"
        elif (vs['compound'] < 0):
            sentiment = "Negative"
        # Save Classification
        classification_obj = Classification(xapistatement=sm_obj,classification=sentiment,classifier='VaderSentiment')
        classification_obj.save()

示例#26

0

显示文件

def get_sentiment(item, source):
    ''' Get the overall sentiment of the videos description '''

    if source == 'twitter':
        description = item['tweet']['orig_text']
    elif source == 'facebook':
        if 'description' in item:
            description = item['description']
        else:
            description = ''
    else:
        description = item['items'][0]['snippet']['description']

    description = description.encode('utf-8').strip()
    sent = vaderSentiment(description)

    item.setdefault("sentiment", sent['compound'])

    return item

示例#27

0

显示文件

文件： instantfeed.py 项目： pipeL/twitterintel

    def on_data(self, data):
        fil = open("meu.txt", "a")
        stop = set(nltk.corpus.stopwords.words('english'))
        stop.update(['http', 'https', 'rt'])
        tweet = json.loads(data)
        if 'text' in tweet:
            texto = tweet['text'].encode('utf-8', 'ignore')
            self.numstop -= 1
            texto = self.user + '-' + texto
            self.producer.send(self.instanttopic, texto)
            saveTweet('pos', tweet, self.user)
            saveLocation('pos', tweet, self.user)

            vs = vaderSentiment(str(texto))
            contagemneg = vs['neg']
            contagempos = vs['pos']
            contagemspam = vs['neu']
            filo = open("vader.txt", 'a')
            if self.numstop == 0:
                return False
        return True

示例#28

0

显示文件

文件： process_data_local.py 项目： jpgard/chicago-restaurant-inspection-googleplaces-mashup

def avg_review_vaderSentiment(data):

    sentiment_totals = defaultdict(int)
    result_categories = set()
    output = []

    for each in data.items():

        results = each[1]['results']
        result_categories.add(results)
        reviews_list = each[1].get('reviews')

        if reviews_list:

            n_reviews = len(reviews_list)
            review_sentiments = [
                vaderSentiment(x[1].encode('utf8')) for x in reviews_list
            ]

            sentiment_totals[results + '_n_reviews'] += n_reviews
            sentiment_totals[results + '_pos_sentiment'] += sum(
                [i['pos'] for i in review_sentiments])
            sentiment_totals[results + '_neg_sentiment'] += sum(
                [i['neg'] for i in review_sentiments])
            sentiment_totals[results + '_neu_sentiment'] += sum(
                [i['neu'] for i in review_sentiments])
            sentiment_totals[results + '_compound_sentiment'] += sum(
                [i['compound'] for i in review_sentiments])

    for result in result_categories:
        t = (result, sentiment_totals[result + '_n_reviews'],
             float(sentiment_totals[result + '_pos_sentiment']) /
             float(sentiment_totals[result + '_n_reviews']),
             float(sentiment_totals[result + '_neg_sentiment']) /
             float(sentiment_totals[result + '_n_reviews']),
             float(sentiment_totals[result + '_compound_sentiment']) /
             float(sentiment_totals[result + '_n_reviews']))
        output.append(t)

    return output

示例#29

0

显示文件

def baseline(tweets, emoji_maps):
    """Takes cleaned tweets. Returns list of baseline predictions"""
    assigned_emojis = list()

    for tweet in tweets:
        emoji_assigned = False

        for word in tweet.split():

            for emoji_map in emoji_maps:

                tags = emoji_map["tags"]

                for tag in tags.split():

                    if word == tag and emoji_assigned == False:
                        uni = emoji_map["unicode"]
                        print(tag)
                        print(uni)
                        assigned_emojis.append(uni.lower())
                        emoji_assigned = True

        if emoji_assigned == False:
            vs = vaderSentiment(tweet)
            positive = vs["pos"]
            negative = vs["neg"]
            neutral = vs["neu"]
            compound = vs["compound"]

            if positive > negative:
                uni = "\u0001f60a"  # TODO: how were these emojis picked?
            else:
                uni = "\u0001f622"

            assigned_emojis.append(uni)

            emoji_assigned = True

    return assigned_emojis

示例#30

0

显示文件

文件： VaderBolt.py 项目： zubairsaiyed/tweet-storm

	def process(self, tup):
		twt = tup.values[0].encode("utf-8","replace")
		# remove links
		twt = re.sub(r'https?:\S+', "", twt)

		# remove @ symbol
		twt = re.sub(r'@', "", twt)

		# split hashtags by camel case
		pattern = re.compile(r'#\S+')
		for match in re.findall(pattern, twt):
			split = match.replace("#","")
			split = re.sub(r"([A-Z])", " \\1", split)
			split = re.sub(r"([a-zA-Z])([0-9])", "\\1 \\2", split)
			split = split[1:] + "."
			twt = twt.replace(match,split)

        # run VADER over normalized text
		vader = vaderSentiment(twt)['compound']

        # emit results for every matching queryId
		for queryId in tup.values[1]:
				storm.emit([queryId, vader, tup.values[0]])

示例#31

0

显示文件

# Search twitter for @CocaCola's most recent tweets
q = 'from:CocaCola'
count=25
tweets = tw.search.tweets(q=q, count=count, lang='en', result_type='recent')
texts=[]

print 'Sentiment Analysis for @CocaCola\'s Most Recent Tweets:'
print '--------------------------------------------------------------------------'

# Sentiment analysis
for status in tweets['statuses']:
    texts.append(status['text'])
    print 'Tweet:'
    print '\t' + status['text'].encode('utf-8')
    vs = vaderSentiment(status['text'].encode('utf-8'))
    print 'Sentiment analysis:'
    print '\t' + str(vs['compound'])
    print '--------------------------------------------------------------------------'

print '\nLexical Analysis for @CocaCola\'s Most Recent Tweets:'
print '--------------------------------------------------------------------------'

# Lexical analysis
for text in texts:
    print 'Tweet:'
    print '\t' + text.encode('utf-8')
    words = []
    for w in text.split():
        words.append(w)
    print 'Lexical diversity:'

示例#32

0

显示文件

def analyze(path,output_path):

    k=open(path,'r')
    o=open(output_path,'w')
    for url in k:
        print url
        split_lines = url.split('/')
        date=extract_time_fox_news(url)
        if date!=0:
            result = []
            result.append(url.strip('\n').strip('\r'))
            if date>=20150101 and ('news' in split_lines or 'politics' in split_lines):
                total_paragraph = 0
                print date
                result.append(str(date))
                sentences = []
                page = requests.get(url.strip("\n"))


                soup = BeautifulSoup(page.content)
                total_score=0
                temp = soup.find_all('article', {"itemprop": "articleBody"})
                header = soup.find_all('em')
                first_p = soup.find_all('br')
                for x in header:
                    start = []
                    for y in x.contents:
                        if exclude_text not in str(y.string):
                            start.append(str(y.string))
                    texts= "".join(start)
                    if texts != " " and texts != "\n" and texts != "" and texts != "\r" and not "This material may not be published, broadcast, rewritten, or redistributed." in texts and texts != 'None':
                        print texts
                        sentences.append(texts)
                        zen = vaderSentiment(unicode(texts))
                        print zen
                        total_paragraph += 1
                        if zen['neu'] < float(1):
                            if zen['pos'] > zen['neg']:
                                total_score += zen['pos']
                            else:
                                total_score -= zen['neg']
                for x in first_p:
                    if str(x.next_sibling) != " " and exclude_text not in str(x.next_sibling):
                        texts= str(x.next_sibling)
                        if texts != " " and texts != "\n" and texts != "" and texts!="\r" and not "This material may not be published, broadcast, rewritten, or redistributed." in texts and texts!='None':
                            print texts
                            sentences.append(texts)
                            zen = vaderSentiment(unicode(texts))
                            print zen
                            total_paragraph += 1
                            if zen['neu'] < float(1):
                                if zen['pos'] > zen['neg']:
                                    total_score += zen['pos']
                                else:
                                    total_score -= zen['neg']

                for x in temp:
                    for y in x.contents:
                        texts=unicode(y.string).strip('\n')

                        if texts != " " and texts != "\n" and texts != "" and texts!="\r" and not "This material may not be published, broadcast, rewritten, or redistributed." in texts and texts!='None':
                            print texts
                            sentences.append(texts)
                            zen = vaderSentiment(unicode(texts))
                            print zen
                            total_paragraph += 1
                            if zen['neu'] < float(1):
                                if zen['pos'] > zen['neg']:
                                    total_score += zen['pos']
                                else:
                                    total_score -= zen['neg']

                rake_object = RAKE.Rake('stop-word-list.txt')
                keywords = rake_object.run("\n".join(sentences))
                trump_prob = 0
                hillary_prob = 0
                for x in keywords:
                    # print x

                    if 'trump' in x[0] or 'donald' in x[0]:
                        # print x
                        trump_prob += int(x[1])
                    if 'hillary' in x[0] or 'clinton' in x[0]:
                        # print x
                        hillary_prob += int(x[1])
                print url.strip("\n")
                print "Trump total ", trump_prob
                print "Hillary total ", hillary_prob
                if abs(trump_prob - hillary_prob) <= 2:
                    result.append("B")
                elif trump_prob > hillary_prob:
                    result.append("T")
                else:
                    result.append("H")
                # zen=vaderSentiment("\n".join(sentences))
                # if zen['neu']<0.5:
                #     if zen['pos']>zen['neg']:
                #         total_score=zen['pos']
                #     else:
                #         total_score=zen['neg']
                # total = neg + neu + pos
                # if total!=0:

                if total_paragraph != 0:
                    print "Total: ", total_score / total_paragraph
                    print "Total paragraph: ", total_paragraph
                    # print "Neg: ", neg
                    # print "Neu: ", neu
                    # print "Pos: ", pos
                    print "\n"
                    # result.append(str(neg))
                    # result.append(str(neu))
                    # result.append(str(pos))
                    result.append(str(total_score / total_paragraph))
                    # print result
                    o.writelines(", ".join(result) + "\n")

示例#33

0

显示文件

文件： twitterstream.py 项目： ronjohn4/twitterframework

from datetime import datetime
from docopt import docopt
from twitterframework import TwitterAPI
from vaderSentiment.vaderSentiment import sentiment as vaderSentiment
import win_unicode_console as win_unicode_console

if __name__ == '__main__':
    arguments = docopt(__doc__, version='Twitter Stream DB 1.0')

twitter_fw = TwitterAPI(arguments['<file>'])
twitter_api = twitter_fw.getAPI()

keylist = [key for key in arguments['<TwitterKeyword>']]
rows = []
win_unicode_console.enable()  # allows printing unicode to windows console

for tweet in twitter_api.GetStreamFilter(follow=None,
                                         track=keylist,
                                         locations=None,
                                         delimited=None,
                                         stall_warnings=None):
    t_lang = tweet['lang']
    t_text = tweet['text']

    vs = vaderSentiment(t_text)
    for i, searchtext in enumerate(keylist):
        if searchtext in t_text:
            print("'{0}','{1}','{2}','{3}','{4}','{5}'".format(
                str(datetime.now()), str(t_text), searchtext, str(t_lang),
                vs['pos'], vs['neg']))

示例#34

0

显示文件

    "Warren Beatty has never been so entertaining.",
    "I won't say that the movie is astounding and I wouldn't claim that \
   the movie is too banal either.",
    "I like to hate Michael Bay films, but I couldn't fault this one",
    "It's one thing to watch an Uwe Boll film, but another thing entirely \
   to pay for it", "The movie was too good",
    "This movie was actually neither that funny, nor super witty.",
    "This movie doesn't care about cleverness, wit or any other kind of \
   intelligent humor.",
    "Those who find ugly meanings in beautiful things are corrupt without \
   being charming.",
    "There are slow and repetitive parts, BUT it has just enough spice to \
   keep it interesting.",
    "The script is not fantastic, but the acting is decent and the cinematography \
   is EXCELLENT!",
    "Roger Dodger is one of the most compelling variations on this theme.",
    "Roger Dodger is one of the least compelling variations on this theme.",
    "Roger Dodger is at least compelling as a variation on the theme.",
    "they fall in love with the product", "but then it breaks",
    "usually around the time the 90 day warranty expires",
    "the twin towers collapsed today",
    "However, Mr. Carter solemnly argues, his client carried out the kidnapping \
   under orders and in the ''least offensive way possible.''"
]
sentences.extend(tricky_sentences)
#sid = SentimentIntensityAnalyzer()
for sentence in sentences:
    print sentence,
    ss = vaderSentiment(sentence)
    print "\t" + str(ss)
    print ""

示例#35

0

显示文件

def get_sentiment(sentence):
	sentiments =  max(vaderSentiment(sentence) .iteritems(), key=operator.itemgetter(1))
	if sentiments[0] != "compound":
		return sentiments[0]
	else:
		return sentiments[1]

示例#36

0

显示文件

文件： facebook_james.py 项目： jwilliams0496/CMSC491

def getSentiment(text):
    vs = vaderSentiment(text.encode('utf-8'))
    return vs['compound']