示例#1
0
def get_tweet_sentiments_scores(movieId):
    """
    Function to assign sentiment scores to movie tweets using movieId.
    
    :param movieId: integer containing movieId from movies tablie
    :return pandas dataframe of movie tweets with sentiment scores and classifications
    """

    #initialise VADER
    analyser = SentimentIntensityAnalyzer()

    #get movie tweets from db
    tweets = database_helper.select_geo_tweets(movieId)

    #list to convert to df for return
    sent_lst = []
    for index, row in tweets.iterrows():

        #get tweet sentiment
        sentiment = analyser.polarity_scores(row['msg'])
        lst_row = {
            "id": row["id"],
            "negative_scr": sentiment["neg"],
            "positive_scr": sentiment["pos"],
            "neutral_scr": sentiment["neu"],
            "compound_scr": sentiment["compound"],
            "senti_class": classify_sentiment(sentiment)
        }
        sent_lst.append(lst_row)

    #return df of movie tweets
    return pd.DataFrame(sent_lst)
示例#2
0
def get_tweet_regions(movieId):
    """
    Function to retreive movie tweets with their respectiv region names
    
    :param movieId: integer containing movieId from movies tablie
    :return geopandas data frame of regional movie tweets
    """

    #get geodataframe of movie tweets
    tweets = database_helper.select_geo_tweets(movieId)
    tweets.dropna(subset=["geombng"], inplace=True)

    #use Ordnance Survey shapefile to attach region names
    gb = gpd.read_file("../../ProjectData/Data/GB/european_region_region.shp")
    gb_tweets = sjoin(tweets, gb, how='inner')
    gb_tweets["region"] = gb_tweets["NAME"].str.replace("Euro Region",
                                                        "").str.strip()

    #return geodataframe of region tagged tweets
    return gb_tweets
示例#3
0
# test = sns.kdeplot(gb_tweets['lng'], gb_tweets['lat'],
#             shade=True, shade_lowest=False, cmap='viridis',
#               ax=ax)

# ax.set_axis_off()
# plt.axis('equal')
# plt.title(self.title + " Tweet Density")
# plt.show()
# plt.clf()
# plt.cla()
# plt.close()

fig, ax = plt.subplots(1, figsize=(9, 9))

gb = gpd.read_file("../../ProjectData/Data/GB/european_region_region.shp")
tweets = database_helper.select_geo_tweets(1)
tweets.dropna(subset=["geombng"], inplace=True)

gb_tweets = sjoin(tweets, gb, how='inner')
gb_tweets["lat"] = gb_tweets["geombng"].y
gb_tweets["lng"] = gb_tweets["geombng"].x

grid = database_helper.select_uk_fishnet()
grid["minx"] = grid["geombng"].bounds.minx
grid["miny"] = grid["geombng"].bounds.miny
grid["maxx"] = grid["geombng"].bounds.maxx
grid["maxy"] = grid["geombng"].bounds.maxy

gb["minx"] = grid["geombng"].bounds.minx
gb["miny"] = grid["geombng"].bounds.miny
gb["maxx"] = grid["geombng"].bounds.maxx
示例#4
0
from colour import Color
import scipy.signal
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nameparser import HumanName
from datetime import datetime
from datetime import timedelta
import scipy.ndimage as ndi

movie = movie_helper.get_movie_by_id(28)

#start_date = datetime.combine((movie.ukReleaseDate - timedelta(days=14)), datetime.min.time())
#end_date = datetime.combine((movie.first_run_end - timedelta(days=14)), datetime.min.time())
start_date = None
end_date = None
   
tweets = database_helper.select_geo_tweets(movie.movieId, start_date, end_date)
tweets = tweets.sort_values(by=['created_at']).reset_index()
times = tweets['created_at']
times_tot_mins = 24*60 - (60*np.array([t.hour for t in times]) + np.array([t.minute for t in times]))

seps=np.array([(times[i]-times[i-1]).total_seconds() for i in range(1,len(times))])
seps[seps==0]=1 # convert zero second separations to 1-second separations

seps = seps.astype(int)

sep_array=np.zeros((len(seps)-1,2)) # 1st column: x-coords, 2nd column: y-coords
sep_array[:,0]=seps[:-1]
sep_array[:,1]=seps[1:]

sep_array = sep_array.astype(int)