Пример #1
0
 def calculateGrowthAndSentiment(self):
     
     mintime = min(map(lambda x: x[1], self.text_data))
     self.created_at = mintime
     self.first_growth_time = mintime
     self.hourly_growth_rate = [0]
     self.hourly_sentiment = [0]
     self.hourly_accum_sentiment = [0]
     self.hourly_keywords = [['']]
     self.hourly_tags = [['']]
     hourly_tweets = dict()
     hourly_tweets_accum = dict()
     maxidx=0
     for tw in zip(self.documents, self.text_data):
         idx = int((tw[1][1]-mintime)/3600/1000)
         if idx > maxidx:
             maxidx = idx
             
         while idx >= len(self.hourly_growth_rate):
             self.hourly_growth_rate.append(0)
             self.hourly_sentiment.append(0)
             self.hourly_accum_sentiment.append(0)
             self.hourly_keywords.append([''])
             self.hourly_tags.append([''])
             
         self.hourly_growth_rate[idx] += 1
         if not idx in hourly_tweets:
             hourly_tweets[idx] = []
         hourly_tweets[idx].append(tw)
         for i in range(idx+1):
             if not i in hourly_tweets_accum:
                 hourly_tweets_accum[i] = []
             hourly_tweets_accum[i].append(tw)
         
     for idx in range(maxidx+1):
         if idx in hourly_tweets_accum:
             hta = list(map(lambda x: x[1][0][0],(hourly_tweets_accum[idx])))
             tmp = cluster_exporter.getTagsForTexts(self, hta)
             self.hourly_keywords[idx]=(cluster_exporter.get_keywords_for_message_list(hta, idfs)[:3])
             if not tmp is None:
                 self.hourly_tags[idx]=cluster_exporter.getTagsForTexts(self, hta)[:3]
             else:
                 self.hourly_tags[idx]=['','','']
          
         if idx in hourly_tweets:    
             cluster_vector = np.mean(list(map(lambda x: x[0],(hourly_tweets[idx]))), axis=0)
             ht = list(map(lambda x: x[1][0][0],(hourly_tweets[idx])))                
             self.hourly_sentiment[idx]=getSentiment(cluster_vector)  
             self.hourly_growth_rate[idx] = len(hourly_tweets[idx])
             
         if idx in hourly_tweets_accum:
             if len(hourly_tweets_accum[idx]) > 0:
                 cluster_vector = np.mean(list(map(lambda x: x[0],(hourly_tweets_accum[idx]))), axis=0)
                 self.hourly_accum_sentiment[idx]=getSentiment(cluster_vector)  
Пример #2
0
def Sentiment():
    
    text = request.values.get("text")
    score = sentiment.getSentiment(text)
    data = {"score" : score}

    return json.dumps(data)
Пример #3
0
def generateReply(message):
    pos = getPOS(message)
    sentiment = getSentiment(message)

    # If error occurred getting POS
    if not pos:
        return "I am not functioning at the moment. Perhaps check your API keys."

    # If user greeted
    if pos[0][0].lower() in greetings:
        return random.choice(greetings_responses)

    # If user said 'You are ... {adjective} ...'
    youAreJJ = findYouAreJJ(pos)
    if youAreJJ:
        if sentiment >= 0.5:
            return "Thank you, I know I'm " + youAreJJ + "."
        else:
            return "No! I'm not " + youAreJJ + "!"

    # If user said 'I am ... {adjective} ...'
    IAmJJ = findIAmJJ(pos)
    if IAmJJ:
        if sentiment >= 0.5:
            return "I'm happy for you that you're " + IAmJJ + "."
        else:
            return "Don't be mean on yourself. I'm sure you're not really " + IAmJJ + "!"

    if sentiment > 0.5:
        return "I'm happy to hear that!"
    else:
        return "I feel sad about that."
Пример #4
0
def board_list():

    sentiment_result = getSentiment()
    world_result = getWordCloud()

    print(sentiment_result)

    return render_template("spray.html")
Пример #5
0
def symbolSentiment(symbol, methods=["GET", "POST"]):

    if request.method == "POST":

        symbol = request.form["symbol"]

    sentimentData = getSentiment(symbol)

    return sentimentData
Пример #6
0
def generateExperimentGroupResponse(user_message, user_id=0):
    """
    Given message from user, generates an answer to the user.
    """
    user_sentiment = getSentiment(user_message)

    context = chatbot.create_client_context(user_id)    
    
    bot_message = context.bot.ask_question(context, user_message)
    bot_sentiment = getSentiment(bot_message)

    positive_user = user_sentiment > 0

    attempts = 1
    # If user's sentiment is negative, the bot's response should be more negative. And vice versa for the positive
    while ((positive_user and (bot_sentiment >= user_sentiment)) or ((not positive_user) and (bot_sentiment <= user_sentiment))):

        if attempts > SYNONYM_SWAP_ATTEMPT_LIMIT:
            break

        try:
            candidate_message = mutateMessage(bot_message)
            candidate_sentiment = getSentiment(candidate_message)

            if positive_user:
                if candidate_sentiment > bot_sentiment:
                    bot_sentiment = candidate_sentiment
                    bot_message = candidate_message
            else:
                if candidate_sentiment < bot_sentiment:
                    bot_sentiment = candidate_sentiment
                    bot_message = candidate_message

        except SynonymNotFound:
            # If swapping of the synonym is NOT possible, it should not be possible on the subsequent tries neither
            attempts += SYNONYM_SWAP_ATTEMPT_LIMIT
            break

        attempts += 1
    
    bot_emoji = getEmoji(bot_message)

    return bot_message + bot_emoji
Пример #7
0
def index():
    if request.form:
        # Multipart Form
        data = request.form
    else:
        # JSON
        data = request.get_json()

    text = data.get("text")
    sentiment_threshold = float(
        data.get("sentiment_threshold", DEFAULT_SENTIMENT_THRESHOLD))
    return jsonify({"words": getSentiment(text, sentiment_threshold)})
Пример #8
0
def DoodleView(request, id):
    response = "This is where %s doodle goes. Expect to see comments here" % Doodle.objects.get(pk=id).title
    template = loader.get_template("Doodlers/doodle.html")
    current_doodle = Doodle.objects.get(pk=id)
    comments = current_doodle.comment_set.all()
    scores = {}
    num_bad = 0
    for comment in comments:
        scores[comment.id] = sentiment.getSentiment(comment.comment_text)
        if scores[comment.id] < 0:
            num_bad += 1

    context = RequestContext(
        request, {"doodle": current_doodle, "comments": comments, "scores": scores, "num_bad": num_bad}
    )
    return render(request, "Doodlers/doodle.html", context)
def bulkJsonData(json_file, _index, whatStuff):
    json_list = c.getDataFromFile(json_file)
    for doc in json_list:

        json_doc = json.loads(doc)

        sentiment = [0, 0, 0]

        # clean the text in comments and title from special character and emojies after json conversion
        if "data" in json_doc:
            my_text_location = json_doc["data"][0]["comment"]
            my_text = my_text_location["comment"]

            #get sentiment
            sentiment = s.getSentiment(my_text)

            clean_my_text = c.cleanText(my_text)
            my_text_location.update([("comment", clean_my_text)])
            json_doc.update([("all_text", clean_my_text)])

            if "group" in my_text_location:
                my_group = my_text_location["group"]
                clean_my_group = c.cleanText(my_group)
                my_text_location.update([("group", clean_my_group)])

        my_title = json_doc["title"]
        clean_my_title = c.cleanText(my_title)
        json_doc.update([("title", clean_my_title)])

        # add sentiment
        json_doc.update([("mySentiment", sentiment[0])])
        json_doc.update([("sentPositive", sentiment[1])])
        json_doc.update([("sentNegative", sentiment[2])])

        # add load_type, used later for filter
        json_doc.update([("load_type", whatStuff)])
        json_doc.update([("source_type", "facebook")])
        new_doc = str(json_doc).replace("'", '"')

        # use a 'yield' generator so that the data isn't loaded into memory
        if '{"index"' not in new_doc:
            yield {"_index": _index, "_id": uuid.uuid4(), "_source": new_doc}
Пример #10
0
def doc_sentiment(params,data):
    print params['begin_date'], params['fq']
    parsed = get_json(params)
    ret = 0
    status = parsed['status']
    if status == "OK":
        resp = parsed['response']
        docs = resp['docs']
        for doc in docs:
            sentiment = 0
            for p in doc:
                var = doc[p]
                if var is not None:
                    if p == "headline":
                        var = var['main']
                    if p == "keywords":
                        var = [e['value'] for e in var]
                        var = " ".join(var)
                    sentiment+= getSentiment(var)
            ret+=sentiment
    return ret
Пример #11
0
def doc_sentiment(params, data):
    print params['begin_date'], params['fq']
    parsed = get_json(params)
    ret = 0
    status = parsed['status']
    if status == "OK":
        resp = parsed['response']
        docs = resp['docs']
        for doc in docs:
            sentiment = 0
            for p in doc:
                var = doc[p]
                if var is not None:
                    if p == "headline":
                        var = var['main']
                    if p == "keywords":
                        var = [e['value'] for e in var]
                        var = " ".join(var)
                    sentiment += getSentiment(var)
            ret += sentiment
    return ret
Пример #12
0
def tweets_handler(api, query, cur_date, prev_date, sentiments, all_locs):
    global count1
    """
    This methods handle the operations on tweets, from extracting them fro twitter api, to cleaning them and getting sentiments from them
    """
    # Calling subroutine to get tweet objects from specified range
    tweets = getTweets(api, query, cur_date, prev_date)
    count1 += 1
    if count1 == 7:
        print("Data Collected", flush=True)
        print("Calculating Sentiments", flush=True)

    # Calling subroutine to open up the tweet batches
    tweets = open_tweet_obj(tweets)

    # Calling subroutine to remove duplicate tweets, if given by twitter
    tweets = remove_duplicate_tweets(tweets)

    # Calling subroutine to extract tweet_text and loc of tweeter from tweet object, now tweets = {"text": ..,"loc":..}
    tweets = extract_data(tweets)

    # calling subroutine for removing promotional tweets
    tweets = remove_promotional_tweets(tweets)

    # calling subroutine cleaning the tweets
    # tweets_text = tweet_cleaner(tweets)

    cur_day_locations = make_loc_dict(tweets)

    # calling subroutine for getting sentiment from the tweets
    cur_day_sentiment = getSentiment(tweets, cur_day_locations)

    # updating sentiments list
    thread_no = current_thread().name
    sentiments[int(thread_no) - 1] = cur_day_sentiment
    all_loc = merge(all_locs, cur_day_locations)
Пример #13
0
def getSentiment(path):

    new_data=[]

    # facebook post
    if os.path.isfile(path+'json-facebook_data/posts/your_posts_1.json'):
        json_file = open(path+'json-facebook_data/posts/your_posts_1.json', encoding="utf8", errors='ignore')
        json_list = [line.strip() for line in json_file]

        for doc in json_list:
            json_doc = json.loads(doc)

            if 'all_text' in json_doc:
                my_text = json_doc["all_text"]
                my_time = json_doc["timestamp"]
                sentiment = s.getSentiment(my_text)

                new_data.append({"date": my_time , "mySentiment": sentiment[0] ,"sentPositive" : sentiment[1], "sentNegative": sentiment[2], "type": "post", "source" : "facebook"})


    # facebook post group
    if os.path.isfile(path+'json-facebook_data/groups/your_posts_and_comments_in_groups_fixed.json'):
        json_file1 = open(path+'json-facebook_data/groups/your_posts_and_comments_in_groups_fixed.json', encoding="utf8", errors='ignore')
        json_list1 = [line.strip() for line in json_file1]

        for doc in json_list1:
            json_doc = json.loads(doc)
            
            my_time = json_doc["timestamp"]

            if 'data' in json_doc:
              for dt in json_doc['data']:
                if 'post' in dt:
                  my_text = dt["post"]
                  sentiment = s.getSentiment(my_text)

                  new_data.append({"date": my_time , "mySentiment": sentiment[0] ,"sentPositive" : sentiment[1], "sentNegative": sentiment[2], "type": "postGroup", "source" : "facebook"})



    # facebook comments
    if os.path.isfile(path+'json-facebook_data/comments/comments.json'):
        json_file2 = open(path+'json-facebook_data/comments/comments.json', encoding="utf8", errors='ignore')
        json_list2 = [line.strip() for line in json_file2]

        for doc in json_list2:
            json_doc = json.loads(doc)
            my_time = json_doc["timestamp"]

            if "data" in json_doc:
                my_text_location = json_doc["data"][0]["comment"]
                my_text = my_text_location["comment"]
                
                sentiment = s.getSentiment(my_text)

                new_data.append({"date": my_time , "mySentiment": sentiment[0] ,"sentPositive" : sentiment[1], "sentNegative": sentiment[2], "type": "comment", "source" : "facebook"})



    # twitter
    if os.path.isfile(path+'json-twitter_data/tweet.json'):
        json_file3 = open(path+'json-twitter_data/tweet.json', encoding="utf8", errors='ignore')
        json_list3 = [line.strip() for line in json_file3]

        for doc in json_list3:
            json_doc1 = json.loads(doc)

            if 'tweet' in json_doc1:
                json_doc = json_doc1["tweet"]
            else:
                json_doc=json_doc1

            if not json_doc['full_text'].startswith("RT @"):
                my_text = json_doc["full_text"]
                my_time = json_doc["created_at"]
                sentiment = s.getSentiment(my_text)

                new_data.append({"date": my_time , "mySentiment": sentiment[0] ,"sentPositive" : sentiment[1], "sentNegative": sentiment[2], "type": "tweet", "source" : "twitter"})


    return new_data
Пример #14
0
        #directory of emotions
        emotions = {
            'anger': 0,
            'fear': 0,
            'anticipation': 0,
            'surprise': 0,
            'sadness': 0,
            'joy': 0,
            'disgust': 0,
            'negative': 0,
            'positive': 0
        }

        search_results = api.search(q=trend + '-filter:retweets',
                                    count=100,
                                    lang='en')
        for tweet in search_results:
            #clean tweet, get sentiment, add it to emotion
            text = tweet.text
            text = helper.clean_tweet(text)
            emotion = sentiment.getSentiment(text)
            for e in emotion:
                emotions[e] = emotions.get(e) + 1
        trendsEmotions.append(max(
            emotions, key=emotions.get))  #emotion with most tweets wins

    #change colour of hue
    hue_control.controlHue(mode(trendsEmotions))
    print('done ' + mode(trendsEmotions))
    time.sleep(180)
Пример #15
0
def bulkJsonData(json_file, _index, whatStuff):
	json_list = c.getDataFromFile(json_file)
	for doc in json_list:

		json_doc = json.loads(doc)

		sentiment=[0,0,0]
		
		# use a 'yield' generator so that the data isn't loaded into memory
		if '{"index"' not in doc:

			# clean the text in comments and title from special character and emojies after json conversion
			if 'data' in json_doc:
				for dt in json_doc['data']:
					if 'post' in dt:
						my_text = dt["post"]

						#get sentiment
						sentiment = s.getSentiment(my_text)

						clean_my_text = c.cleanText(my_text)
						dt.update([ ("post", clean_my_text) ])
						json_doc.update([ ("all_text", clean_my_text) ])	

			if 'attachments' in json_doc:
				for att in json_doc['attachments']:
					if 'data' in att:
						for dt in att['data']:
							if 'external_context' in dt:
								if 'name' in dt["external_context"]:
									my_text2 = dt["external_context"]["name"]
									clean_my_text2 = c.cleanText(my_text2)
									dt["external_context"].update([ ("name", clean_my_text2) ])	 

							if 'media' in dt:
								my_title2 = dt['media']['title']
								clean_my_title2 = c.cleanText(my_title2)
								dt['media'].update([ ("title", clean_my_title2) ])	 

								if 'description' in dt['media']:
									my_description = dt['media']["description"]
									clean_my_description = c.cleanText(my_description)
									dt['media'].update([ ("description", clean_my_description) ])

							if 'place' in dt:
								my_loc = dt["place"]["coordinate"]
								my_lat = my_loc["latitude"]
								my_lon = my_loc["longitude"]
								new_my_loc = [my_lon,my_lat]
								dt["place"].update([ ("location", new_my_loc) ])
			 		
			if 'title' in json_doc:
				my_title = json_doc["title"]
				clean_my_title = c.cleanText(my_title)
				json_doc.update([ ("title", clean_my_title) ])	

			# add sentiment
			json_doc.update([ ("mySentiment", sentiment[0]) ]) 
			json_doc.update([ ("sentPositive", sentiment[1]) ]) 
			json_doc.update([ ("sentNegative", sentiment[2]) ]) 

			# add load_type, used later for filter
			json_doc.update([ ("load_type", whatStuff) ]) 
			json_doc.update([ ("source_type", "facebook") ])
			new_doc = str(json_doc).replace("'", '"')


			yield {
				"_index": _index,
				"_id": uuid.uuid4(),
				 "_source": new_doc
			}
Пример #16
0
def product(id):

    fileName = id + ".txt"
    srcComment = "comments/" + fileName
    srcPhrase = "phrases/" + fileName
    # if this already been search, load the cache
    if os.path.exists(srcPhrase) == True:
        print("loading cache of phrase")
    else:
        if os.path.exists(srcComment) == True:
            print("loading cache of text")
        else:
            f = open('id.txt', 'w')
            f.write(id)
            f.close()

            scrapper('id.txt')
            print('********** scrapping finished *************')

        # move the review to the autophrase directory
        desComment = "../AutoPhrase/data/"
        copyfile(srcComment, desComment + 'review.txt')
        #subprocess.call(['../AutoPhrase/auto_phrase.sh'])
        call("./auto_phrase.sh", cwd="../AutoPhrase", shell=True)
        copyfile("../AutoPhrase/models/AutoPhrase.txt", "phrases/" + fileName)

    response = {}
    frequencies = []

    phrase_file = 'phrases/' + fileName
    with open(phrase_file) as fp:
        lines = fp.readlines()

        count = 0

        for line in lines:
            if count > 100:
                break
            lineContent = line.split()
            if len(lineContent) >= 2:
                score = float(lineContent[0])
                separator = " "
                phrase = separator.join(lineContent[1:])

                if score == 1:
                    continue

                if count <= 8:
                    frequencies.append({
                        "text": phrase,
                        "size": int(score * 80)
                    })
                elif count <= 40:
                    frequencies.append({
                        "text": phrase,
                        "size": int(score * 50)
                    })
                elif count <= 70:
                    frequencies.append({
                        "text": phrase,
                        "size": int(score * 40)
                    })
                elif count <= 100 and score > 0.3:
                    frequencies.append({
                        "text": phrase,
                        "size": max(20, int(score * 20))
                    })
            count += 1

    senti = getSentiment(id)

    response['frequency'] = frequencies
    response['sentiment'] = senti

    return jsonify(response)
def bulkJsonData(json_file, _index, whatStuff):
    json_list = c.getDataFromFile(json_file)
    for doc in json_list:
        # use a 'yield' generator so that the data isn't loaded into memory
        if '{"index"' not in doc:

            json_doc1 = json.loads(doc)

            sentiment = [0, 0, 0]

            if 'tweet' in json_doc1:
                json_doc = json_doc1["tweet"]
            else:
                json_doc = json_doc1

            my_text = json_doc["full_text"]

            #get sentiment
            if not my_text.startswith("RT"):
                sentiment = s.getSentiment(my_text)

            clean_my_text = c.cleanText(my_text)
            json_doc.update([("full_text", clean_my_text)])

            my_text2 = json_doc["source"]
            clean_my_text2 = c.cleanText(my_text2)
            json_doc.update([("source", clean_my_text2)])

            # Does not like "False", needed to be "false" !!!
            #my_text1 = json_doc["retweeted"]
            #clean_my_text1 = c.cleanText(str(my_text1))
            #json_doc.update([ ("retweeted", clean_my_text1) ])

            #if 'truncated' in json_doc:
            #  my_text3 = json_doc["truncated"]
            #  clean_my_text3 = c.cleanText(str(my_text3))
            #  json_doc.update([ ("truncated", clean_my_text3) ])

            #if 'favorited' in json_doc:
            #  my_text4 = json_doc["favorited"]
            #  clean_my_text4 = c.cleanText(str(my_text4))
            #  json_doc.update([ ("favorited", clean_my_text4) ])

            #if 'possibly_sensitive' in json_doc:
            #  my_text5 = json_doc["possibly_sensitive"]
            #  clean_my_text5 = c.cleanText(str(my_text5))
            #  json_doc.update([ ("possibly_sensitive", clean_my_text5) ])

        if 'in_reply_to_screen_name' in json_doc:
            my_name = json_doc["in_reply_to_screen_name"]
            clean_my_name = c.cleanText(my_name)
            json_doc.update([("in_reply_to_screen_name", clean_my_name)])

        if 'user_mentions' in json_doc["entities"]:
            for usr in json_doc["entities"]['user_mentions']:
                my_name1 = usr["name"]
                clean_my_name1 = c.cleanText(my_name1)
                usr.update([("name", clean_my_name1)])

                my_name2 = usr["screen_name"]
                clean_my_name2 = c.cleanText(my_name2)
                usr.update([("screen_name", clean_my_name2)])

            for usr in json_doc["entities"]['urls']:
                my_name3 = usr["url"]
                clean_my_name3 = c.cleanText(my_name3)
                usr.update([("url", clean_my_name3)])

                my_name4 = usr["expanded_url"]
                clean_my_name4 = c.cleanText(my_name4)
                usr.update([("expanded_url", clean_my_name4)])

                my_name5 = usr["display_url"]
                clean_my_name5 = c.cleanText(my_name5)
                usr.update([("display_url", clean_my_name5)])

        #if 'media' in json_doc:
        #  my_media = json_doc["media"]
        #  if 'additional_media_info' in my_media:
        #    my_name6 = my_media["additional_media_info"]
        #    clean_my_name6 = c.cleanText(my_name6)
        #    my_media.update([ ("additional_media_info", clean_my_name6) ])

        # add sentiment
            json_doc.update([("mySentiment", sentiment[0])])
            json_doc.update([("sentPositive", sentiment[1])])
            json_doc.update([("sentNegative", sentiment[2])])

            # add load_type, used later for filter
            json_doc.update([("load_type", whatStuff)])
            json_doc.update([("source_type", "twitter")])
            new_doc = str(json_doc).replace("'", '"')
            #print (new_doc)

            new_doc = new_doc.replace("False", "false")
            new_doc = new_doc.replace("True", "true")

            yield {"_index": _index, "_id": uuid.uuid4(), "_source": new_doc}
Пример #18
0
def SetValues(): ##function to return floating point representation to front end
    comments = request.form.getlist('comments[]')
    links = request.form.getlist('links[]')
    return jsonify({"data": getSentiment(comments, links)})#ValueArray
usernames = json.load(usernamesFile)

for d, i in enumerate(usernames):
    print "checking", d['pageID']
    con = sqlite3.connect('scraperwiki.sqlite')
    query = 'select * from comments where pageID="{pageID}"'.format(
        pageID=d['pageID'])
    table = sql.read_sql(query, con)

    # print table

    rows = table.iterrows()

    abuseTweets = []
    for row in rows:
        sentimentResult = sentiment.getSentiment(row[1]['message'])
        print sentimentResult
        results.append({
            "pol_id": i,
            "partyCode": d["partyCode"],
            "gender": d["gender"],
            "message": row[1]['message'],
            "positive": sentimentResult['positive'],
            "negative": sentimentResult['negative'],
            "createdTime": row[1]['createdTime']
        })

    # print results
    with open('facebook-sentiment-test.csv', 'w') as csvoutput:
        dict_writer = csv.DictWriter(csvoutput, results[0].keys())
        dict_writer.writer.writerow(results[0].keys())